forked from OSchip/llvm-project
X86TTI: Add accurate costs for itofp operations, based on the actual instruction counts.
llvm-svn: 178459
This commit is contained in:
parent
e624795558
commit
52ceb44331
|
@ -271,10 +271,33 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const {
|
|||
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 1 },
|
||||
{ ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 1 },
|
||||
{ ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 1 },
|
||||
{ ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 1 },
|
||||
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 1 },
|
||||
{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 1 },
|
||||
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 1 },
|
||||
|
||||
{ ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i1, 8 },
|
||||
{ ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 8 },
|
||||
{ ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 5 },
|
||||
{ ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i32, 1 },
|
||||
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 },
|
||||
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
|
||||
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 3 },
|
||||
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
|
||||
{ ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i1, 3 },
|
||||
{ ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i8, 3 },
|
||||
{ ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i16, 3 },
|
||||
{ ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i32, 1 },
|
||||
|
||||
{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i1, 6 },
|
||||
{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 5 },
|
||||
{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 5 },
|
||||
{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 9 },
|
||||
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i1, 7 },
|
||||
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 2 },
|
||||
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
|
||||
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 6 },
|
||||
{ ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i1, 7 },
|
||||
{ ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i8, 2 },
|
||||
{ ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i16, 2 },
|
||||
{ ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i32, 6 },
|
||||
|
||||
{ ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 1 },
|
||||
{ ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 1 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 6 },
|
||||
|
|
|
@ -77,3 +77,78 @@ define i32 @masks4(<4 x i1> %in) {
|
|||
ret i32 undef
|
||||
}
|
||||
|
||||
define void @sitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) {
|
||||
; CHECK: cost of 3 {{.*}} sitofp
|
||||
%A1 = sitofp <4 x i1> %a to <4 x float>
|
||||
; CHECK: cost of 3 {{.*}} sitofp
|
||||
%A2 = sitofp <4 x i1> %a to <4 x double>
|
||||
|
||||
; CHECK: cost of 3 {{.*}} sitofp
|
||||
%B1 = sitofp <4 x i8> %b to <4 x float>
|
||||
; CHECK: cost of 3 {{.*}} sitofp
|
||||
%B2 = sitofp <4 x i8> %b to <4 x double>
|
||||
|
||||
; CHECK: cost of 3 {{.*}} sitofp
|
||||
%C1 = sitofp <4 x i16> %c to <4 x float>
|
||||
; CHECK: cost of 3 {{.*}} sitofp
|
||||
%C2 = sitofp <4 x i16> %c to <4 x double>
|
||||
|
||||
; CHECK: cost of 1 {{.*}} sitofp
|
||||
%D1 = sitofp <4 x i32> %d to <4 x float>
|
||||
; CHECK: cost of 1 {{.*}} sitofp
|
||||
%D2 = sitofp <4 x i32> %d to <4 x double>
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @sitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) {
|
||||
; CHECK: cost of 8 {{.*}} sitofp
|
||||
%A1 = sitofp <8 x i1> %a to <8 x float>
|
||||
|
||||
; CHECK: cost of 8 {{.*}} sitofp
|
||||
%B1 = sitofp <8 x i8> %b to <8 x float>
|
||||
|
||||
; CHECK: cost of 5 {{.*}} sitofp
|
||||
%C1 = sitofp <8 x i16> %c to <8 x float>
|
||||
|
||||
; CHECK: cost of 1 {{.*}} sitofp
|
||||
%D1 = sitofp <8 x i32> %d to <8 x float>
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @uitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) {
|
||||
; CHECK: cost of 7 {{.*}} uitofp
|
||||
%A1 = uitofp <4 x i1> %a to <4 x float>
|
||||
; CHECK: cost of 7 {{.*}} uitofp
|
||||
%A2 = uitofp <4 x i1> %a to <4 x double>
|
||||
|
||||
; CHECK: cost of 2 {{.*}} uitofp
|
||||
%B1 = uitofp <4 x i8> %b to <4 x float>
|
||||
; CHECK: cost of 2 {{.*}} uitofp
|
||||
%B2 = uitofp <4 x i8> %b to <4 x double>
|
||||
|
||||
; CHECK: cost of 2 {{.*}} uitofp
|
||||
%C1 = uitofp <4 x i16> %c to <4 x float>
|
||||
; CHECK: cost of 2 {{.*}} uitofp
|
||||
%C2 = uitofp <4 x i16> %c to <4 x double>
|
||||
|
||||
; CHECK: cost of 6 {{.*}} uitofp
|
||||
%D1 = uitofp <4 x i32> %d to <4 x float>
|
||||
; CHECK: cost of 6 {{.*}} uitofp
|
||||
%D2 = uitofp <4 x i32> %d to <4 x double>
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @uitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) {
|
||||
; CHECK: cost of 6 {{.*}} uitofp
|
||||
%A1 = uitofp <8 x i1> %a to <8 x float>
|
||||
|
||||
; CHECK: cost of 5 {{.*}} uitofp
|
||||
%B1 = uitofp <8 x i8> %b to <8 x float>
|
||||
|
||||
; CHECK: cost of 5 {{.*}} uitofp
|
||||
%C1 = uitofp <8 x i16> %c to <8 x float>
|
||||
|
||||
; CHECK: cost of 9 {{.*}} uitofp
|
||||
%D1 = uitofp <8 x i32> %d to <8 x float>
|
||||
ret void
|
||||
}
|
||||
|
|
|
@ -33,11 +33,10 @@ define i32 @conversion_cost2(i32 %n, i8* nocapture %A, float* nocapture %B) noun
|
|||
|
||||
.lr.ph: ; preds = %0, %.lr.ph
|
||||
%indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 9, %0 ]
|
||||
%2 = add nsw i64 %indvars.iv, 3
|
||||
%3 = trunc i64 %2 to i32
|
||||
%4 = sitofp i32 %3 to float
|
||||
%5 = getelementptr inbounds float* %B, i64 %indvars.iv
|
||||
store float %4, float* %5, align 4
|
||||
%add = add nsw i64 %indvars.iv, 3
|
||||
%tofp = sitofp i64 %add to float
|
||||
%gep = getelementptr inbounds float* %B, i64 %indvars.iv
|
||||
store float %tofp, float* %gep, align 4
|
||||
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, %n
|
||||
|
|
Loading…
Reference in New Issue