forked from OSchip/llvm-project
Revert 224119 "This patch recognizes (+ (+ v0, v1) (+ v2, v3)), reorders them for bundling into vector of loads,
and vectorizes it." This was re-ordering floating point data types resulting in mismatch in output. llvm-svn: 224424
This commit is contained in:
parent
372deb091e
commit
43fae93da8
|
@ -439,13 +439,6 @@ public:
|
||||||
/// \returns true if the memory operations A and B are consecutive.
|
/// \returns true if the memory operations A and B are consecutive.
|
||||||
bool isConsecutiveAccess(Value *A, Value *B);
|
bool isConsecutiveAccess(Value *A, Value *B);
|
||||||
|
|
||||||
/// For consecutive loads (+(+ v0, v1)(+ v2, v3)), Left had v0 and v2
|
|
||||||
/// while Right had v1 and v3, which prevented bundling them into
|
|
||||||
/// a vector of loads. Rorder them so that Left now has v0 and v1
|
|
||||||
/// while Right has v2 and v3 enabling their bundling into a vector.
|
|
||||||
void reorderIfConsecutiveLoads(SmallVectorImpl<Value *> &Left,
|
|
||||||
SmallVectorImpl<Value *> &Right);
|
|
||||||
|
|
||||||
/// \brief Perform LICM and CSE on the newly generated gather sequences.
|
/// \brief Perform LICM and CSE on the newly generated gather sequences.
|
||||||
void optimizeGatherSequence();
|
void optimizeGatherSequence();
|
||||||
|
|
||||||
|
@ -1241,7 +1234,6 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
|
||||||
if (isa<BinaryOperator>(VL0) && VL0->isCommutative()) {
|
if (isa<BinaryOperator>(VL0) && VL0->isCommutative()) {
|
||||||
ValueList Left, Right;
|
ValueList Left, Right;
|
||||||
reorderInputsAccordingToOpcode(VL, Left, Right);
|
reorderInputsAccordingToOpcode(VL, Left, Right);
|
||||||
reorderIfConsecutiveLoads (Left, Right);
|
|
||||||
buildTree_rec(Left, Depth + 1);
|
buildTree_rec(Left, Depth + 1);
|
||||||
buildTree_rec(Right, Depth + 1);
|
buildTree_rec(Right, Depth + 1);
|
||||||
return;
|
return;
|
||||||
|
@ -1826,19 +1818,6 @@ bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B) {
|
||||||
return X == PtrSCEVB;
|
return X == PtrSCEVB;
|
||||||
}
|
}
|
||||||
|
|
||||||
void BoUpSLP::reorderIfConsecutiveLoads(SmallVectorImpl<Value *> &Left,
|
|
||||||
SmallVectorImpl<Value *> &Right) {
|
|
||||||
for (unsigned i = 0, e = Left.size(); i < e - 1; ++i) {
|
|
||||||
if (!isa<LoadInst>(Left[i]) || !isa<LoadInst>(Right[i]))
|
|
||||||
return;
|
|
||||||
if (!(isConsecutiveAccess(Left[i], Right[i]) &&
|
|
||||||
isConsecutiveAccess(Right[i], Left[i + 1])))
|
|
||||||
continue;
|
|
||||||
else
|
|
||||||
std::swap(Left[i + 1], Right[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void BoUpSLP::setInsertPointAfterBundle(ArrayRef<Value *> VL) {
|
void BoUpSLP::setInsertPointAfterBundle(ArrayRef<Value *> VL) {
|
||||||
Instruction *VL0 = cast<Instruction>(VL[0]);
|
Instruction *VL0 = cast<Instruction>(VL[0]);
|
||||||
BasicBlock::iterator NextInst = VL0;
|
BasicBlock::iterator NextInst = VL0;
|
||||||
|
@ -2069,10 +2048,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
|
||||||
case Instruction::Or:
|
case Instruction::Or:
|
||||||
case Instruction::Xor: {
|
case Instruction::Xor: {
|
||||||
ValueList LHSVL, RHSVL;
|
ValueList LHSVL, RHSVL;
|
||||||
if (isa<BinaryOperator>(VL0) && VL0->isCommutative()) {
|
if (isa<BinaryOperator>(VL0) && VL0->isCommutative())
|
||||||
reorderInputsAccordingToOpcode(E->Scalars, LHSVL, RHSVL);
|
reorderInputsAccordingToOpcode(E->Scalars, LHSVL, RHSVL);
|
||||||
reorderIfConsecutiveLoads(LHSVL, RHSVL);
|
else
|
||||||
} else
|
|
||||||
for (int i = 0, e = E->Scalars.size(); i < e; ++i) {
|
for (int i = 0, e = E->Scalars.size(); i < e; ++i) {
|
||||||
LHSVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(0));
|
LHSVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(0));
|
||||||
RHSVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(1));
|
RHSVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(1));
|
||||||
|
|
|
@ -1,27 +0,0 @@
|
||||||
; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=aarch64-unknown-linux-gnu -mcpu=cortex-a57 | FileCheck %s
|
|
||||||
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
|
|
||||||
target triple = "aarch64--linux-gnu"
|
|
||||||
|
|
||||||
; float hadd (float *a) {
|
|
||||||
; return (a[0] + a[1]) + (a[2] + a[3]);
|
|
||||||
; }
|
|
||||||
|
|
||||||
; CHECK-LABEL: @hadd
|
|
||||||
; CHECK: load <2 x float>*
|
|
||||||
; CHECK: fadd <2 x float>
|
|
||||||
; CHECK: extractelement <2 x float>
|
|
||||||
|
|
||||||
define float @hadd(float* nocapture readonly %a) {
|
|
||||||
entry:
|
|
||||||
%0 = load float* %a, align 4
|
|
||||||
%arrayidx1 = getelementptr inbounds float* %a, i64 1
|
|
||||||
%1 = load float* %arrayidx1, align 4
|
|
||||||
%add = fadd float %0, %1
|
|
||||||
%arrayidx2 = getelementptr inbounds float* %a, i64 2
|
|
||||||
%2 = load float* %arrayidx2, align 4
|
|
||||||
%arrayidx3 = getelementptr inbounds float* %a, i64 3
|
|
||||||
%3 = load float* %arrayidx3, align 4
|
|
||||||
%add4 = fadd float %2, %3
|
|
||||||
%add5 = fadd float %add, %add4
|
|
||||||
ret float %add5
|
|
||||||
}
|
|
Loading…
Reference in New Issue