forked from OSchip/llvm-project
Revert "[AArch64] Set maximum VF with shouldMaximizeVectorBandwidth"
This reverts commit 64b6192e81
.
This broke LLVM AArch64 buildbot clang-aarch64-sve-vls-2stage:
https://lab.llvm.org/buildbot/#/builders/176/builds/1515
llvm-tblgen crashes after applying this patch.
This commit is contained in:
parent
32f3633171
commit
42ebfa8269
|
@ -937,8 +937,7 @@ public:
|
|||
/// creating vectors that span multiple vector registers.
|
||||
/// If false, the vectorization factor will be chosen based on the
|
||||
/// size of the widest element type.
|
||||
/// \p K Register Kind for vectorization.
|
||||
bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const;
|
||||
bool shouldMaximizeVectorBandwidth() const;
|
||||
|
||||
/// \return The minimum vectorization factor for types of given element
|
||||
/// bit width, or 0 if there is no minimum VF. The returned value only
|
||||
|
@ -1630,8 +1629,7 @@ public:
|
|||
virtual unsigned getMinVectorRegisterBitWidth() const = 0;
|
||||
virtual Optional<unsigned> getMaxVScale() const = 0;
|
||||
virtual Optional<unsigned> getVScaleForTuning() const = 0;
|
||||
virtual bool
|
||||
shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const = 0;
|
||||
virtual bool shouldMaximizeVectorBandwidth() const = 0;
|
||||
virtual ElementCount getMinimumVF(unsigned ElemWidth,
|
||||
bool IsScalable) const = 0;
|
||||
virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const = 0;
|
||||
|
@ -2128,9 +2126,8 @@ public:
|
|||
Optional<unsigned> getVScaleForTuning() const override {
|
||||
return Impl.getVScaleForTuning();
|
||||
}
|
||||
bool shouldMaximizeVectorBandwidth(
|
||||
TargetTransformInfo::RegisterKind K) const override {
|
||||
return Impl.shouldMaximizeVectorBandwidth(K);
|
||||
bool shouldMaximizeVectorBandwidth() const override {
|
||||
return Impl.shouldMaximizeVectorBandwidth();
|
||||
}
|
||||
ElementCount getMinimumVF(unsigned ElemWidth,
|
||||
bool IsScalable) const override {
|
||||
|
|
|
@ -417,10 +417,7 @@ public:
|
|||
Optional<unsigned> getMaxVScale() const { return None; }
|
||||
Optional<unsigned> getVScaleForTuning() const { return None; }
|
||||
|
||||
bool
|
||||
shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const {
|
||||
return false;
|
||||
}
|
||||
bool shouldMaximizeVectorBandwidth() const { return false; }
|
||||
|
||||
ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const {
|
||||
return ElementCount::get(0, IsScalable);
|
||||
|
|
|
@ -626,9 +626,8 @@ Optional<unsigned> TargetTransformInfo::getVScaleForTuning() const {
|
|||
return TTIImpl->getVScaleForTuning();
|
||||
}
|
||||
|
||||
bool TargetTransformInfo::shouldMaximizeVectorBandwidth(
|
||||
TargetTransformInfo::RegisterKind K) const {
|
||||
return TTIImpl->shouldMaximizeVectorBandwidth(K);
|
||||
bool TargetTransformInfo::shouldMaximizeVectorBandwidth() const {
|
||||
return TTIImpl->shouldMaximizeVectorBandwidth();
|
||||
}
|
||||
|
||||
ElementCount TargetTransformInfo::getMinimumVF(unsigned ElemWidth,
|
||||
|
|
|
@ -50,12 +50,6 @@ bool AArch64TTIImpl::areInlineCompatible(const Function *Caller,
|
|||
return (CallerBits & CalleeBits) == CalleeBits;
|
||||
}
|
||||
|
||||
bool AArch64TTIImpl::shouldMaximizeVectorBandwidth(
|
||||
TargetTransformInfo::RegisterKind K) const {
|
||||
assert(K != TargetTransformInfo::RGK_Scalar);
|
||||
return K == TargetTransformInfo::RGK_FixedWidthVector;
|
||||
}
|
||||
|
||||
/// Calculate the cost of materializing a 64-bit value. This helper
|
||||
/// method might only calculate a fraction of a larger immediate. Therefore it
|
||||
/// is valid to return a cost of ZERO.
|
||||
|
|
|
@ -135,8 +135,6 @@ public:
|
|||
return ST->getVScaleForTuning();
|
||||
}
|
||||
|
||||
bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const;
|
||||
|
||||
/// Try to return an estimate cost factor that can be used as a multiplier
|
||||
/// when scalarizing an operation for a vector with ElementCount \p VF.
|
||||
/// For scalable vectors this currently takes the most pessimistic view based
|
||||
|
|
|
@ -86,11 +86,12 @@ public:
|
|||
unsigned getMinVectorRegisterBitWidth() const;
|
||||
ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const;
|
||||
|
||||
bool
|
||||
shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const {
|
||||
bool shouldMaximizeVectorBandwidth() const {
|
||||
return true;
|
||||
}
|
||||
bool supportsEfficientVectorElementLoadStore() { return false; }
|
||||
bool supportsEfficientVectorElementLoadStore() {
|
||||
return false;
|
||||
}
|
||||
bool hasBranchDivergence() {
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -5198,12 +5198,9 @@ ElementCount LoopVectorizationCostModel::getMaximizedVFForTarget(
|
|||
return ElementCount::getFixed(ClampedConstTripCount);
|
||||
}
|
||||
|
||||
TargetTransformInfo::RegisterKind RegKind =
|
||||
ComputeScalableMaxVF ? TargetTransformInfo::RGK_ScalableVector
|
||||
: TargetTransformInfo::RGK_FixedWidthVector;
|
||||
ElementCount MaxVF = MaxVectorElementCount;
|
||||
if (MaximizeBandwidth || (MaximizeBandwidth.getNumOccurrences() == 0 &&
|
||||
TTI.shouldMaximizeVectorBandwidth(RegKind))) {
|
||||
TTI.shouldMaximizeVectorBandwidth())) {
|
||||
auto MaxVectorElementCountMaxBW = ElementCount::get(
|
||||
PowerOf2Floor(WidestRegister.getKnownMinSize() / SmallestType),
|
||||
ComputeScalableMaxVF);
|
||||
|
|
|
@ -4,12 +4,11 @@
|
|||
; are not profitable.
|
||||
|
||||
; Test with a loop that contains memory accesses of i8 and i32 types. The
|
||||
; maximum VF for NEON is calculated by 128/size of smallest type in loop.
|
||||
; And while we don't have an instruction to load 4 x i8, vectorization
|
||||
; might still be profitable.
|
||||
; default maximum VF for NEON is 4. And while we don't have an instruction to
|
||||
; load 4 x i8, vectorization might still be profitable.
|
||||
define void @test_load_i8_store_i32(i8* noalias %src, i32* noalias %dst, i32 %off, i64 %N) {
|
||||
; CHECK-LABEL: @test_load_i8_store_i32(
|
||||
; CHECK: <16 x i8>
|
||||
; CHECK: <4 x i8>
|
||||
;
|
||||
entry:
|
||||
br label %loop
|
||||
|
@ -33,7 +32,7 @@ exit:
|
|||
; Same as test_load_i8_store_i32, but with types flipped for load and store.
|
||||
define void @test_load_i32_store_i8(i32* noalias %src, i8* noalias %dst, i32 %off, i64 %N) {
|
||||
; CHECK-LABEL: @test_load_i32_store_i8(
|
||||
; CHECK: <16 x i8>
|
||||
; CHECK: <4 x i8>
|
||||
;
|
||||
entry:
|
||||
br label %loop
|
||||
|
@ -85,7 +84,7 @@ exit:
|
|||
; vectorization factor.
|
||||
define void @test_load_i8_store_i64_large(i8* noalias %src, i64* noalias %dst, i64* noalias %dst.2, i64* noalias %dst.3, i64* noalias %dst.4, i64* noalias %dst.5, i64%off, i64 %off.2, i64 %N) {
|
||||
; CHECK-LABEL: @test_load_i8_store_i64_large
|
||||
; CHECK: <8 x i64>
|
||||
; CHECK: <2 x i64>
|
||||
;
|
||||
entry:
|
||||
br label %loop
|
||||
|
|
|
@ -116,9 +116,9 @@ for.body: ; preds = %entry, %for.body
|
|||
}
|
||||
|
||||
; CHECK-LABEL: @add_d(
|
||||
; CHECK: load <8 x i16>
|
||||
; CHECK: add nsw <8 x i32>
|
||||
; CHECK: store <8 x i32>
|
||||
; CHECK: load <4 x i16>
|
||||
; CHECK: add nsw <4 x i32>
|
||||
; CHECK: store <4 x i32>
|
||||
define void @add_d(i16* noalias nocapture readonly %p, i32* noalias nocapture %q, i32 %len) #0 {
|
||||
entry:
|
||||
%cmp7 = icmp sgt i32 %len, 0
|
||||
|
|
|
@ -123,16 +123,16 @@ for.body:
|
|||
; }
|
||||
;
|
||||
; CHECK: vector.body:
|
||||
; CHECK: phi <16 x i16>
|
||||
; CHECK: [[Ld1:%[a-zA-Z0-9.]+]] = load <16 x i8>
|
||||
; CHECK: zext <16 x i8> [[Ld1]] to <16 x i16>
|
||||
; CHECK: [[Ld2:%[a-zA-Z0-9.]+]] = load <16 x i8>
|
||||
; CHECK: zext <16 x i8> [[Ld2]] to <16 x i16>
|
||||
; CHECK: add <16 x i16>
|
||||
; CHECK: add <16 x i16>
|
||||
; CHECK: phi <8 x i16>
|
||||
; CHECK: [[Ld1:%[a-zA-Z0-9.]+]] = load <8 x i8>
|
||||
; CHECK: zext <8 x i8> [[Ld1]] to <8 x i16>
|
||||
; CHECK: [[Ld2:%[a-zA-Z0-9.]+]] = load <8 x i8>
|
||||
; CHECK: zext <8 x i8> [[Ld2]] to <8 x i16>
|
||||
; CHECK: add <8 x i16>
|
||||
; CHECK: add <8 x i16>
|
||||
;
|
||||
; CHECK: middle.block:
|
||||
; CHECK: [[Rdx:%[a-zA-Z0-9.]+]] = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16>
|
||||
; CHECK: [[Rdx:%[a-zA-Z0-9.]+]] = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16>
|
||||
; CHECK: zext i16 [[Rdx]] to i32
|
||||
;
|
||||
define i16 @reduction_i16_2(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %n) {
|
||||
|
|
|
@ -29,7 +29,7 @@
|
|||
; NEOVERSE-N2: LV: Vector loop of width vscale x 4 costs: 3 (assuming a minimum vscale of 1).
|
||||
|
||||
; VF-4: <4 x i32>
|
||||
; VF-VSCALE4: <16 x i32>
|
||||
; VF-VSCALE4: <vscale x 4 x i32>
|
||||
define void @test0(i32* %a, i8* %b, i32* %c) #0 {
|
||||
entry:
|
||||
br label %loop
|
||||
|
|
|
@ -9,9 +9,9 @@
|
|||
define void @test0(i32* %a, i8* %b, i32* %c) #0 {
|
||||
; CHECK: LV: Checking a loop in 'test0'
|
||||
; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 4
|
||||
; CHECK_SCALABLE_ON: LV: Selecting VF: 16
|
||||
; CHECK_SCALABLE_ON: LV: Selecting VF: vscale x 4
|
||||
; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF
|
||||
; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 16
|
||||
; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 4
|
||||
; CHECK_SCALABLE_ON_MAXBW: LV: Found feasible scalable VF = vscale x 16
|
||||
; CHECK_SCALABLE_ON_MAXBW: LV: Selecting VF: vscale x 16
|
||||
entry:
|
||||
|
@ -40,9 +40,9 @@ exit:
|
|||
define void @test1(i32* %a, i8* %b) #0 {
|
||||
; CHECK: LV: Checking a loop in 'test1'
|
||||
; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 4
|
||||
; CHECK_SCALABLE_ON: LV: Selecting VF: 16
|
||||
; CHECK_SCALABLE_ON: LV: Selecting VF: vscale x 4
|
||||
; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF
|
||||
; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 16
|
||||
; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 4
|
||||
; CHECK_SCALABLE_ON_MAXBW: LV: Found feasible scalable VF = vscale x 4
|
||||
; CHECK_SCALABLE_ON_MAXBW: LV: Selecting VF: 16
|
||||
entry:
|
||||
|
@ -72,9 +72,9 @@ exit:
|
|||
define void @test2(i32* %a, i8* %b) #0 {
|
||||
; CHECK: LV: Checking a loop in 'test2'
|
||||
; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 2
|
||||
; CHECK_SCALABLE_ON: LV: Selecting VF: 16
|
||||
; CHECK_SCALABLE_ON: LV: Selecting VF: vscale x 2
|
||||
; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF
|
||||
; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 16
|
||||
; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 4
|
||||
; CHECK_SCALABLE_ON_MAXBW: LV: Found feasible scalable VF = vscale x 2
|
||||
; CHECK_SCALABLE_ON_MAXBW: LV: Selecting VF: 16
|
||||
entry:
|
||||
|
@ -104,9 +104,9 @@ exit:
|
|||
define void @test3(i32* %a, i8* %b) #0 {
|
||||
; CHECK: LV: Checking a loop in 'test3'
|
||||
; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 1
|
||||
; CHECK_SCALABLE_ON: LV: Selecting VF: 16
|
||||
; CHECK_SCALABLE_ON: LV: Selecting VF: 4
|
||||
; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF
|
||||
; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 16
|
||||
; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 4
|
||||
; CHECK_SCALABLE_ON_MAXBW: LV: Found feasible scalable VF = vscale x 1
|
||||
; CHECK_SCALABLE_ON_MAXBW: LV: Selecting VF: 16
|
||||
entry:
|
||||
|
|
|
@ -83,11 +83,11 @@ for.end:
|
|||
define void @uniform_store_i1(i1* noalias %dst, i64* noalias %start, i64 %N) {
|
||||
; CHECK-LABEL: @uniform_store_i1
|
||||
; CHECK: vector.body
|
||||
; CHECK: %[[GEP:.*]] = getelementptr inbounds i64, <64 x i64*> {{.*}}, i64 1
|
||||
; CHECK: %[[ICMP:.*]] = icmp eq <64 x i64*> %[[GEP]], %[[SPLAT:.*]]
|
||||
; CHECK: %[[EXTRACT1:.*]] = extractelement <64 x i1> %[[ICMP]], i32 0
|
||||
; CHECK: %[[GEP:.*]] = getelementptr inbounds i64, <2 x i64*> {{.*}}, i64 1
|
||||
; CHECK: %[[ICMP:.*]] = icmp eq <2 x i64*> %[[GEP]], %[[SPLAT:.*]]
|
||||
; CHECK: %[[EXTRACT1:.*]] = extractelement <2 x i1> %[[ICMP]], i32 0
|
||||
; CHECK: store i1 %[[EXTRACT1]], i1* %dst
|
||||
; CHECK: %[[EXTRACT2:.*]] = extractelement <64 x i1> %[[ICMP]], i32 1
|
||||
; CHECK: %[[EXTRACT2:.*]] = extractelement <2 x i1> %[[ICMP]], i32 1
|
||||
; CHECK: store i1 %[[EXTRACT2]], i1* %dst
|
||||
; CHECK-NOT: vscale
|
||||
entry:
|
||||
|
|
Loading…
Reference in New Issue