forked from OSchip/llvm-project
Teach BBVectorize to combine, when possible, or discard metadata when fusing instructions.
The present implementation handles only TBAA and FP metadata, discarding everything else. For debug metadata, the current behavior is maintained (the debug metadata associated with one of the instructions will be kept, discarding that attached to the other). This should address PR 13040. llvm-svn: 158606
This commit is contained in:
parent
16ddd4b66b
commit
fa103d3fc7
|
@ -23,6 +23,7 @@
|
||||||
#include "llvm/IntrinsicInst.h"
|
#include "llvm/IntrinsicInst.h"
|
||||||
#include "llvm/Intrinsics.h"
|
#include "llvm/Intrinsics.h"
|
||||||
#include "llvm/LLVMContext.h"
|
#include "llvm/LLVMContext.h"
|
||||||
|
#include "llvm/Metadata.h"
|
||||||
#include "llvm/Pass.h"
|
#include "llvm/Pass.h"
|
||||||
#include "llvm/Type.h"
|
#include "llvm/Type.h"
|
||||||
#include "llvm/ADT/DenseMap.h"
|
#include "llvm/ADT/DenseMap.h"
|
||||||
|
@ -303,6 +304,8 @@ namespace {
|
||||||
Instruction *&InsertionPt,
|
Instruction *&InsertionPt,
|
||||||
Instruction *I, Instruction *J);
|
Instruction *I, Instruction *J);
|
||||||
|
|
||||||
|
void combineMetadata(Instruction *K, const Instruction *J);
|
||||||
|
|
||||||
bool vectorizeBB(BasicBlock &BB) {
|
bool vectorizeBB(BasicBlock &BB) {
|
||||||
bool changed = false;
|
bool changed = false;
|
||||||
// Iterate a sufficient number of times to merge types of size 1 bit,
|
// Iterate a sufficient number of times to merge types of size 1 bit,
|
||||||
|
@ -1784,6 +1787,31 @@ namespace {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// When the first instruction in each pair is cloned, it will inherit its
|
||||||
|
// parent's metadata. This metadata must be combined with that of the other
|
||||||
|
// instruction in a safe way.
|
||||||
|
void BBVectorize::combineMetadata(Instruction *K, const Instruction *J) {
|
||||||
|
SmallVector<std::pair<unsigned, MDNode*>, 4> Metadata;
|
||||||
|
K->getAllMetadataOtherThanDebugLoc(Metadata);
|
||||||
|
for (unsigned i = 0, n = Metadata.size(); i < n; ++i) {
|
||||||
|
unsigned Kind = Metadata[i].first;
|
||||||
|
MDNode *JMD = J->getMetadata(Kind);
|
||||||
|
MDNode *KMD = Metadata[i].second;
|
||||||
|
|
||||||
|
switch (Kind) {
|
||||||
|
default:
|
||||||
|
K->setMetadata(Kind, 0); // Remove unknown metadata
|
||||||
|
break;
|
||||||
|
case LLVMContext::MD_tbaa:
|
||||||
|
K->setMetadata(Kind, MDNode::getMostGenericTBAA(JMD, KMD));
|
||||||
|
break;
|
||||||
|
case LLVMContext::MD_fpmath:
|
||||||
|
K->setMetadata(Kind, MDNode::getMostGenericFPMath(JMD, KMD));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// This function fuses the chosen instruction pairs into vector instructions,
|
// This function fuses the chosen instruction pairs into vector instructions,
|
||||||
// taking care preserve any needed scalar outputs and, then, it reorders the
|
// taking care preserve any needed scalar outputs and, then, it reorders the
|
||||||
// remaining instructions as needed (users of the first member of the pair
|
// remaining instructions as needed (users of the first member of the pair
|
||||||
|
@ -1863,6 +1891,8 @@ namespace {
|
||||||
if (!isa<StoreInst>(K))
|
if (!isa<StoreInst>(K))
|
||||||
K->mutateType(getVecTypeForPair(I->getType()));
|
K->mutateType(getVecTypeForPair(I->getType()));
|
||||||
|
|
||||||
|
combineMetadata(K, J);
|
||||||
|
|
||||||
for (unsigned o = 0; o < NumOperands; ++o)
|
for (unsigned o = 0; o < NumOperands; ++o)
|
||||||
K->setOperand(o, ReplacedOperands[o]);
|
K->setOperand(o, ReplacedOperands[o]);
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,49 @@
|
||||||
|
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||||
|
; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -S | FileCheck %s
|
||||||
|
|
||||||
|
; Simple 3-pair chain with loads and stores (with fpmath)
|
||||||
|
define void @test1(double* %a, double* %b, double* %c) nounwind uwtable readonly {
|
||||||
|
entry:
|
||||||
|
%i0 = load double* %a, align 8
|
||||||
|
%i1 = load double* %b, align 8
|
||||||
|
%mul = fmul double %i0, %i1, !fpmath !2
|
||||||
|
%arrayidx3 = getelementptr inbounds double* %a, i64 1
|
||||||
|
%i3 = load double* %arrayidx3, align 8
|
||||||
|
%arrayidx4 = getelementptr inbounds double* %b, i64 1
|
||||||
|
%i4 = load double* %arrayidx4, align 8
|
||||||
|
%mul5 = fmul double %i3, %i4, !fpmath !3
|
||||||
|
store double %mul, double* %c, align 8
|
||||||
|
%arrayidx5 = getelementptr inbounds double* %c, i64 1
|
||||||
|
store double %mul5, double* %arrayidx5, align 8
|
||||||
|
ret void
|
||||||
|
; CHECK: @test1
|
||||||
|
; CHECK: !fpmath
|
||||||
|
; CHECK: ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; Simple 3-pair chain with loads and stores (ints with range)
|
||||||
|
define void @test2(i64* %a, i64* %b, i64* %c) nounwind uwtable readonly {
|
||||||
|
entry:
|
||||||
|
%i0 = load i64* %a, align 8, !range !0
|
||||||
|
%i1 = load i64* %b, align 8
|
||||||
|
%mul = mul i64 %i0, %i1
|
||||||
|
%arrayidx3 = getelementptr inbounds i64* %a, i64 1
|
||||||
|
%i3 = load i64* %arrayidx3, align 8, !range !1
|
||||||
|
%arrayidx4 = getelementptr inbounds i64* %b, i64 1
|
||||||
|
%i4 = load i64* %arrayidx4, align 8
|
||||||
|
%mul5 = mul i64 %i3, %i4
|
||||||
|
store i64 %mul, i64* %c, align 8
|
||||||
|
%arrayidx5 = getelementptr inbounds i64* %c, i64 1
|
||||||
|
store i64 %mul5, i64* %arrayidx5, align 8
|
||||||
|
ret void
|
||||||
|
; CHECK: @test2
|
||||||
|
; CHECK-NOT: !range
|
||||||
|
; CHECK: ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
!0 = metadata !{i64 0, i64 2}
|
||||||
|
!1 = metadata !{i64 3, i64 5}
|
||||||
|
|
||||||
|
!2 = metadata !{ float 5.0 }
|
||||||
|
!3 = metadata !{ float 2.5 }
|
||||||
|
|
Loading…
Reference in New Issue