forked from OSchip/llvm-project
Add support for llvm.assume intrinsic to the LoadStoreVectorizer pass
Patch by Viacheslav Nikolaev. Thanks!
This commit is contained in:
parent
f0c9d1e95f
commit
9542721085
|
@ -49,6 +49,7 @@
|
||||||
#include "llvm/ADT/Statistic.h"
|
#include "llvm/ADT/Statistic.h"
|
||||||
#include "llvm/ADT/iterator_range.h"
|
#include "llvm/ADT/iterator_range.h"
|
||||||
#include "llvm/Analysis/AliasAnalysis.h"
|
#include "llvm/Analysis/AliasAnalysis.h"
|
||||||
|
#include "llvm/Analysis/AssumptionCache.h"
|
||||||
#include "llvm/Analysis/MemoryLocation.h"
|
#include "llvm/Analysis/MemoryLocation.h"
|
||||||
#include "llvm/Analysis/ScalarEvolution.h"
|
#include "llvm/Analysis/ScalarEvolution.h"
|
||||||
#include "llvm/Analysis/TargetTransformInfo.h"
|
#include "llvm/Analysis/TargetTransformInfo.h"
|
||||||
|
@ -111,6 +112,7 @@ using InstrListMap = MapVector<ChainID, InstrList>;
|
||||||
class Vectorizer {
|
class Vectorizer {
|
||||||
Function &F;
|
Function &F;
|
||||||
AliasAnalysis &AA;
|
AliasAnalysis &AA;
|
||||||
|
AssumptionCache &AC;
|
||||||
DominatorTree &DT;
|
DominatorTree &DT;
|
||||||
ScalarEvolution &SE;
|
ScalarEvolution &SE;
|
||||||
TargetTransformInfo &TTI;
|
TargetTransformInfo &TTI;
|
||||||
|
@ -118,9 +120,9 @@ class Vectorizer {
|
||||||
IRBuilder<> Builder;
|
IRBuilder<> Builder;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
Vectorizer(Function &F, AliasAnalysis &AA, DominatorTree &DT,
|
Vectorizer(Function &F, AliasAnalysis &AA, AssumptionCache &AC,
|
||||||
ScalarEvolution &SE, TargetTransformInfo &TTI)
|
DominatorTree &DT, ScalarEvolution &SE, TargetTransformInfo &TTI)
|
||||||
: F(F), AA(AA), DT(DT), SE(SE), TTI(TTI),
|
: F(F), AA(AA), AC(AC), DT(DT), SE(SE), TTI(TTI),
|
||||||
DL(F.getParent()->getDataLayout()), Builder(SE.getContext()) {}
|
DL(F.getParent()->getDataLayout()), Builder(SE.getContext()) {}
|
||||||
|
|
||||||
bool run();
|
bool run();
|
||||||
|
@ -205,6 +207,7 @@ public:
|
||||||
|
|
||||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||||
AU.addRequired<AAResultsWrapperPass>();
|
AU.addRequired<AAResultsWrapperPass>();
|
||||||
|
AU.addRequired<AssumptionCacheTracker>();
|
||||||
AU.addRequired<ScalarEvolutionWrapperPass>();
|
AU.addRequired<ScalarEvolutionWrapperPass>();
|
||||||
AU.addRequired<DominatorTreeWrapperPass>();
|
AU.addRequired<DominatorTreeWrapperPass>();
|
||||||
AU.addRequired<TargetTransformInfoWrapperPass>();
|
AU.addRequired<TargetTransformInfoWrapperPass>();
|
||||||
|
@ -219,6 +222,7 @@ char LoadStoreVectorizerLegacyPass::ID = 0;
|
||||||
INITIALIZE_PASS_BEGIN(LoadStoreVectorizerLegacyPass, DEBUG_TYPE,
|
INITIALIZE_PASS_BEGIN(LoadStoreVectorizerLegacyPass, DEBUG_TYPE,
|
||||||
"Vectorize load and Store instructions", false, false)
|
"Vectorize load and Store instructions", false, false)
|
||||||
INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass)
|
INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass)
|
||||||
|
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker);
|
||||||
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
|
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
|
||||||
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
|
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
|
||||||
INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
|
INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
|
||||||
|
@ -241,7 +245,10 @@ bool LoadStoreVectorizerLegacyPass::runOnFunction(Function &F) {
|
||||||
TargetTransformInfo &TTI =
|
TargetTransformInfo &TTI =
|
||||||
getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
|
getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
|
||||||
|
|
||||||
Vectorizer V(F, AA, DT, SE, TTI);
|
AssumptionCache &AC =
|
||||||
|
getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
|
||||||
|
|
||||||
|
Vectorizer V(F, AA, AC, DT, SE, TTI);
|
||||||
return V.run();
|
return V.run();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -254,8 +261,9 @@ PreservedAnalyses LoadStoreVectorizerPass::run(Function &F, FunctionAnalysisMana
|
||||||
DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);
|
DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);
|
||||||
ScalarEvolution &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
|
ScalarEvolution &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
|
||||||
TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(F);
|
TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(F);
|
||||||
|
AssumptionCache &AC = AM.getResult<AssumptionAnalysis>(F);
|
||||||
|
|
||||||
Vectorizer V(F, AA, DT, SE, TTI);
|
Vectorizer V(F, AA, AC, DT, SE, TTI);
|
||||||
bool Changed = V.run();
|
bool Changed = V.run();
|
||||||
PreservedAnalyses PA;
|
PreservedAnalyses PA;
|
||||||
PA.preserveSet<CFGAnalyses>();
|
PA.preserveSet<CFGAnalyses>();
|
||||||
|
@ -510,7 +518,7 @@ bool Vectorizer::lookThroughComplexAddresses(Value *PtrA, Value *PtrB,
|
||||||
if (!OpA)
|
if (!OpA)
|
||||||
return false;
|
return false;
|
||||||
KnownBits Known(BitWidth);
|
KnownBits Known(BitWidth);
|
||||||
computeKnownBits(OpA, Known, DL, 0, nullptr, OpA, &DT);
|
computeKnownBits(OpA, Known, DL, 0, &AC, OpA, &DT);
|
||||||
APInt BitsAllowedToBeSet = Known.Zero.zext(IdxDiff.getBitWidth());
|
APInt BitsAllowedToBeSet = Known.Zero.zext(IdxDiff.getBitWidth());
|
||||||
if (Signed)
|
if (Signed)
|
||||||
BitsAllowedToBeSet.clearBit(BitWidth - 1);
|
BitsAllowedToBeSet.clearBit(BitWidth - 1);
|
||||||
|
|
|
@ -104,8 +104,180 @@ bb:
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
declare void @llvm.assume(i1)
|
||||||
|
|
||||||
|
define void @ld_v4i8_add_known_bits(i32 %ind0, i32 %ind1, i8* %src, <4 x i8>* %dst) {
|
||||||
|
; CHECK-LABEL: @ld_v4i8_add_known_bits(
|
||||||
|
; CHECK-NEXT: bb:
|
||||||
|
; CHECK-NEXT: [[V0:%.*]] = mul i32 [[IND0:%.*]], 4
|
||||||
|
; CHECK-NEXT: [[V1:%.*]] = mul i32 [[IND1:%.*]], 4
|
||||||
|
; CHECK-NEXT: [[TMP:%.*]] = add i32 [[V0]], -1
|
||||||
|
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[V1]], [[TMP]]
|
||||||
|
; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64
|
||||||
|
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[SRC:%.*]], i64 [[TMP2]]
|
||||||
|
; CHECK-NEXT: [[TMP4:%.*]] = load i8, i8* [[TMP3]], align 1
|
||||||
|
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[V1]], [[V0]]
|
||||||
|
; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64
|
||||||
|
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, i8* [[SRC]], i64 [[TMP6]]
|
||||||
|
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[TMP7]] to <3 x i8>*
|
||||||
|
; CHECK-NEXT: [[TMP1:%.*]] = load <3 x i8>, <3 x i8>* [[TMP0]], align 1
|
||||||
|
; CHECK-NEXT: [[TMP81:%.*]] = extractelement <3 x i8> [[TMP1]], i32 0
|
||||||
|
; CHECK-NEXT: [[TMP132:%.*]] = extractelement <3 x i8> [[TMP1]], i32 1
|
||||||
|
; CHECK-NEXT: [[TMP183:%.*]] = extractelement <3 x i8> [[TMP1]], i32 2
|
||||||
|
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x i8> undef, i8 [[TMP4]], i32 0
|
||||||
|
; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x i8> [[TMP19]], i8 [[TMP81]], i32 1
|
||||||
|
; CHECK-NEXT: [[TMP21:%.*]] = insertelement <4 x i8> [[TMP20]], i8 [[TMP132]], i32 2
|
||||||
|
; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x i8> [[TMP21]], i8 [[TMP183]], i32 3
|
||||||
|
; CHECK-NEXT: store <4 x i8> [[TMP22]], <4 x i8>* [[DST:%.*]]
|
||||||
|
; CHECK-NEXT: ret void
|
||||||
|
;
|
||||||
|
bb:
|
||||||
|
%v0 = mul i32 %ind0, 4
|
||||||
|
%v1 = mul i32 %ind1, 4
|
||||||
|
%tmp = add i32 %v0, -1
|
||||||
|
%tmp1 = add i32 %v1, %tmp
|
||||||
|
%tmp2 = sext i32 %tmp1 to i64
|
||||||
|
%tmp3 = getelementptr inbounds i8, i8* %src, i64 %tmp2
|
||||||
|
%tmp4 = load i8, i8* %tmp3, align 1
|
||||||
|
%tmp5 = add i32 %v1, %v0
|
||||||
|
%tmp6 = sext i32 %tmp5 to i64
|
||||||
|
%tmp7 = getelementptr inbounds i8, i8* %src, i64 %tmp6
|
||||||
|
%tmp8 = load i8, i8* %tmp7, align 1
|
||||||
|
%tmp9 = add i32 %v0, 1
|
||||||
|
%tmp10 = add i32 %v1, %tmp9
|
||||||
|
%tmp11 = sext i32 %tmp10 to i64
|
||||||
|
%tmp12 = getelementptr inbounds i8, i8* %src, i64 %tmp11
|
||||||
|
%tmp13 = load i8, i8* %tmp12, align 1
|
||||||
|
%tmp14 = add i32 %v0, 2
|
||||||
|
%tmp15 = add i32 %v1, %tmp14
|
||||||
|
%tmp16 = sext i32 %tmp15 to i64
|
||||||
|
%tmp17 = getelementptr inbounds i8, i8* %src, i64 %tmp16
|
||||||
|
%tmp18 = load i8, i8* %tmp17, align 1
|
||||||
|
%tmp19 = insertelement <4 x i8> undef, i8 %tmp4, i32 0
|
||||||
|
%tmp20 = insertelement <4 x i8> %tmp19, i8 %tmp8, i32 1
|
||||||
|
%tmp21 = insertelement <4 x i8> %tmp20, i8 %tmp13, i32 2
|
||||||
|
%tmp22 = insertelement <4 x i8> %tmp21, i8 %tmp18, i32 3
|
||||||
|
store <4 x i8> %tmp22, <4 x i8>* %dst
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @ld_v4i8_add_known_bits1(i32 %ind0, i32 %ind1, i8* %src, <4 x i8>* %dst) {
|
||||||
|
; CHECK-LABEL: @ld_v4i8_add_known_bits1(
|
||||||
|
; CHECK-NEXT: bb:
|
||||||
|
; CHECK-NEXT: [[V0:%.*]] = mul i32 [[IND0:%.*]], 4
|
||||||
|
; CHECK-NEXT: [[V1:%.*]] = mul i32 [[IND1:%.*]], 4
|
||||||
|
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[V1]], [[V0]]
|
||||||
|
; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64
|
||||||
|
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, i8* [[SRC:%.*]], i64 [[TMP6]]
|
||||||
|
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[TMP7]] to <4 x i8>*
|
||||||
|
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* [[TMP0]], align 1
|
||||||
|
; CHECK-NEXT: [[TMP81:%.*]] = extractelement <4 x i8> [[TMP1]], i32 0
|
||||||
|
; CHECK-NEXT: [[TMP132:%.*]] = extractelement <4 x i8> [[TMP1]], i32 1
|
||||||
|
; CHECK-NEXT: [[TMP183:%.*]] = extractelement <4 x i8> [[TMP1]], i32 2
|
||||||
|
; CHECK-NEXT: [[TMP44:%.*]] = extractelement <4 x i8> [[TMP1]], i32 3
|
||||||
|
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x i8> undef, i8 [[TMP44]], i32 0
|
||||||
|
; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x i8> [[TMP19]], i8 [[TMP81]], i32 1
|
||||||
|
; CHECK-NEXT: [[TMP21:%.*]] = insertelement <4 x i8> [[TMP20]], i8 [[TMP132]], i32 2
|
||||||
|
; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x i8> [[TMP21]], i8 [[TMP183]], i32 3
|
||||||
|
; CHECK-NEXT: store <4 x i8> [[TMP22]], <4 x i8>* [[DST:%.*]]
|
||||||
|
; CHECK-NEXT: ret void
|
||||||
|
;
|
||||||
|
bb:
|
||||||
|
%v0 = mul i32 %ind0, 4
|
||||||
|
%v1 = mul i32 %ind1, 4
|
||||||
|
%tmp = add i32 %v0, 3
|
||||||
|
%tmp1 = add i32 %v1, %tmp
|
||||||
|
%tmp2 = sext i32 %tmp1 to i64
|
||||||
|
%tmp3 = getelementptr inbounds i8, i8* %src, i64 %tmp2
|
||||||
|
%tmp4 = load i8, i8* %tmp3, align 1
|
||||||
|
%tmp5 = add i32 %v1, %v0
|
||||||
|
%tmp6 = sext i32 %tmp5 to i64
|
||||||
|
%tmp7 = getelementptr inbounds i8, i8* %src, i64 %tmp6
|
||||||
|
%tmp8 = load i8, i8* %tmp7, align 1
|
||||||
|
%tmp9 = add i32 %v0, 1
|
||||||
|
%tmp10 = add i32 %v1, %tmp9
|
||||||
|
%tmp11 = sext i32 %tmp10 to i64
|
||||||
|
%tmp12 = getelementptr inbounds i8, i8* %src, i64 %tmp11
|
||||||
|
%tmp13 = load i8, i8* %tmp12, align 1
|
||||||
|
%tmp14 = add i32 %v0, 2
|
||||||
|
%tmp15 = add i32 %v1, %tmp14
|
||||||
|
%tmp16 = sext i32 %tmp15 to i64
|
||||||
|
%tmp17 = getelementptr inbounds i8, i8* %src, i64 %tmp16
|
||||||
|
%tmp18 = load i8, i8* %tmp17, align 1
|
||||||
|
%tmp19 = insertelement <4 x i8> undef, i8 %tmp4, i32 0
|
||||||
|
%tmp20 = insertelement <4 x i8> %tmp19, i8 %tmp8, i32 1
|
||||||
|
%tmp21 = insertelement <4 x i8> %tmp20, i8 %tmp13, i32 2
|
||||||
|
%tmp22 = insertelement <4 x i8> %tmp21, i8 %tmp18, i32 3
|
||||||
|
store <4 x i8> %tmp22, <4 x i8>* %dst
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @ld_v4i8_add_known_bits_by_assume(i32 %ind0, i32 %ind1, i8* %src, <4 x i8>* %dst) {
|
||||||
|
; CHECK-LABEL: @ld_v4i8_add_known_bits_by_assume(
|
||||||
|
; CHECK-NEXT: bb:
|
||||||
|
; CHECK-NEXT: [[V0:%.*]] = mul i32 [[IND0:%.*]], 3
|
||||||
|
; CHECK-NEXT: [[V1:%.*]] = mul i32 [[IND1:%.*]], 3
|
||||||
|
; CHECK-NEXT: [[AND_I:%.*]] = and i32 [[V0]], 3
|
||||||
|
; CHECK-NEXT: [[CMP_I:%.*]] = icmp eq i32 [[AND_I]], 0
|
||||||
|
; CHECK-NEXT: [[AND_I_1:%.*]] = and i32 [[V1]], 3
|
||||||
|
; CHECK-NEXT: [[CMP_I_1:%.*]] = icmp eq i32 [[AND_I_1]], 0
|
||||||
|
; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_I]])
|
||||||
|
; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_I_1]])
|
||||||
|
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[V1]], [[V0]]
|
||||||
|
; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64
|
||||||
|
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, i8* [[SRC:%.*]], i64 [[TMP6]]
|
||||||
|
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[TMP7]] to <4 x i8>*
|
||||||
|
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* [[TMP0]], align 1
|
||||||
|
; CHECK-NEXT: [[TMP81:%.*]] = extractelement <4 x i8> [[TMP1]], i32 0
|
||||||
|
; CHECK-NEXT: [[TMP132:%.*]] = extractelement <4 x i8> [[TMP1]], i32 1
|
||||||
|
; CHECK-NEXT: [[TMP183:%.*]] = extractelement <4 x i8> [[TMP1]], i32 2
|
||||||
|
; CHECK-NEXT: [[TMP44:%.*]] = extractelement <4 x i8> [[TMP1]], i32 3
|
||||||
|
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x i8> undef, i8 [[TMP44]], i32 0
|
||||||
|
; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x i8> [[TMP19]], i8 [[TMP81]], i32 1
|
||||||
|
; CHECK-NEXT: [[TMP21:%.*]] = insertelement <4 x i8> [[TMP20]], i8 [[TMP132]], i32 2
|
||||||
|
; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x i8> [[TMP21]], i8 [[TMP183]], i32 3
|
||||||
|
; CHECK-NEXT: store <4 x i8> [[TMP22]], <4 x i8>* [[DST:%.*]]
|
||||||
|
; CHECK-NEXT: ret void
|
||||||
|
;
|
||||||
|
bb:
|
||||||
|
%v0 = mul i32 %ind0, 3
|
||||||
|
%v1 = mul i32 %ind1, 3
|
||||||
|
%and.i = and i32 %v0, 3
|
||||||
|
%cmp.i = icmp eq i32 %and.i, 0
|
||||||
|
%and.i.1 = and i32 %v1, 3
|
||||||
|
%cmp.i.1 = icmp eq i32 %and.i.1, 0
|
||||||
|
call void @llvm.assume(i1 %cmp.i)
|
||||||
|
call void @llvm.assume(i1 %cmp.i.1)
|
||||||
|
%tmp = add i32 %v0, 3
|
||||||
|
%tmp1 = add i32 %v1, %tmp
|
||||||
|
%tmp2 = sext i32 %tmp1 to i64
|
||||||
|
%tmp3 = getelementptr inbounds i8, i8* %src, i64 %tmp2
|
||||||
|
%tmp4 = load i8, i8* %tmp3, align 1
|
||||||
|
%tmp5 = add i32 %v1, %v0
|
||||||
|
%tmp6 = sext i32 %tmp5 to i64
|
||||||
|
%tmp7 = getelementptr inbounds i8, i8* %src, i64 %tmp6
|
||||||
|
%tmp8 = load i8, i8* %tmp7, align 1
|
||||||
|
%tmp9 = add i32 %v0, 1
|
||||||
|
%tmp10 = add i32 %v1, %tmp9
|
||||||
|
%tmp11 = sext i32 %tmp10 to i64
|
||||||
|
%tmp12 = getelementptr inbounds i8, i8* %src, i64 %tmp11
|
||||||
|
%tmp13 = load i8, i8* %tmp12, align 1
|
||||||
|
%tmp14 = add i32 %v0, 2
|
||||||
|
%tmp15 = add i32 %v1, %tmp14
|
||||||
|
%tmp16 = sext i32 %tmp15 to i64
|
||||||
|
%tmp17 = getelementptr inbounds i8, i8* %src, i64 %tmp16
|
||||||
|
%tmp18 = load i8, i8* %tmp17, align 1
|
||||||
|
%tmp19 = insertelement <4 x i8> undef, i8 %tmp4, i32 0
|
||||||
|
%tmp20 = insertelement <4 x i8> %tmp19, i8 %tmp8, i32 1
|
||||||
|
%tmp21 = insertelement <4 x i8> %tmp20, i8 %tmp13, i32 2
|
||||||
|
%tmp22 = insertelement <4 x i8> %tmp21, i8 %tmp18, i32 3
|
||||||
|
store <4 x i8> %tmp22, <4 x i8>* %dst
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
; Make sure we don't vectorize the loads below because the source of
|
; Make sure we don't vectorize the loads below because the source of
|
||||||
; sext instructions doesn't have the nsw flag.
|
; sext instructions doesn't have the nsw flag or known bits allowing
|
||||||
|
; to apply the vectorization.
|
||||||
|
|
||||||
define void @ld_v4i8_add_not_safe(i32 %v0, i32 %v1, i8* %src, <4 x i8>* %dst) {
|
define void @ld_v4i8_add_not_safe(i32 %v0, i32 %v1, i8* %src, <4 x i8>* %dst) {
|
||||||
; CHECK-LABEL: @ld_v4i8_add_not_safe(
|
; CHECK-LABEL: @ld_v4i8_add_not_safe(
|
||||||
|
|
Loading…
Reference in New Issue