forked from OSchip/llvm-project
[CostModel][X86] Fix SSE1 FADD/FSUB costs
Noticed in D56011 - handle the case that scalar fp ops are quicker on P3 than P4 Add the other costs so that we're not relying on the default "is legal/custom" cost logic. llvm-svn: 350403
This commit is contained in:
parent
107dd2565c
commit
c2054144ee
|
@ -832,6 +832,12 @@ int X86TTIImpl::getArithmeticInstrCost(
|
|||
{ ISD::FDIV, MVT::v4f32, 39 }, // Pentium IV from http://www.agner.org/
|
||||
{ ISD::FDIV, MVT::f64, 38 }, // Pentium IV from http://www.agner.org/
|
||||
{ ISD::FDIV, MVT::v2f64, 69 }, // Pentium IV from http://www.agner.org/
|
||||
|
||||
{ ISD::FADD, MVT::f32, 2 }, // Pentium IV from http://www.agner.org/
|
||||
{ ISD::FADD, MVT::f64, 2 }, // Pentium IV from http://www.agner.org/
|
||||
|
||||
{ ISD::FSUB, MVT::f32, 2 }, // Pentium IV from http://www.agner.org/
|
||||
{ ISD::FSUB, MVT::f64, 2 }, // Pentium IV from http://www.agner.org/
|
||||
};
|
||||
|
||||
if (ST->hasSSE2())
|
||||
|
@ -841,6 +847,12 @@ int X86TTIImpl::getArithmeticInstrCost(
|
|||
static const CostTblEntry SSE1CostTable[] = {
|
||||
{ ISD::FDIV, MVT::f32, 17 }, // Pentium III from http://www.agner.org/
|
||||
{ ISD::FDIV, MVT::v4f32, 34 }, // Pentium III from http://www.agner.org/
|
||||
|
||||
{ ISD::FADD, MVT::f32, 1 }, // Pentium III from http://www.agner.org/
|
||||
{ ISD::FADD, MVT::v4f32, 2 }, // Pentium III from http://www.agner.org/
|
||||
|
||||
{ ISD::FSUB, MVT::f32, 1 }, // Pentium III from http://www.agner.org/
|
||||
{ ISD::FSUB, MVT::v4f32, 2 }, // Pentium III from http://www.agner.org/
|
||||
};
|
||||
|
||||
if (ST->hasSSE1())
|
||||
|
|
|
@ -16,7 +16,7 @@ target triple = "x86_64-apple-macosx10.8.0"
|
|||
|
||||
define i32 @fadd(i32 %arg) {
|
||||
; SSE1-LABEL: 'fadd'
|
||||
; SSE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = fadd float undef, undef
|
||||
; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fadd float undef, undef
|
||||
; SSE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fadd <4 x float> undef, undef
|
||||
; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fadd <8 x float> undef, undef
|
||||
; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fadd <16 x float> undef, undef
|
||||
|
@ -129,7 +129,7 @@ define i32 @fadd(i32 %arg) {
|
|||
|
||||
define i32 @fsub(i32 %arg) {
|
||||
; SSE1-LABEL: 'fsub'
|
||||
; SSE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = fsub float undef, undef
|
||||
; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float undef, undef
|
||||
; SSE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fsub <4 x float> undef, undef
|
||||
; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fsub <8 x float> undef, undef
|
||||
; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fsub <16 x float> undef, undef
|
||||
|
|
Loading…
Reference in New Issue