diff --git a/llvm/lib/Target/AMDGPU/SISchedule.td b/llvm/lib/Target/AMDGPU/SISchedule.td index 5236e6c86c33..fdd1ad43a7b5 100644 --- a/llvm/lib/Target/AMDGPU/SISchedule.td +++ b/llvm/lib/Target/AMDGPU/SISchedule.td @@ -29,6 +29,7 @@ def MIMFMARead : SchedRead; // Vector ALU instructions def Write32Bit : SchedWrite; +def WriteFloatCvt : SchedWrite; def WriteQuarterRate32 : SchedWrite; def WriteFloatFMA : SchedWrite; @@ -126,6 +127,7 @@ multiclass SICommonWriteRes { def : HWVALUWriteRes; def : HWVALUWriteRes; + def : HWVALUWriteRes; def : HWVALUWriteRes; def : HWVALUWriteRes; def : HWVALUWriteRes; @@ -185,6 +187,7 @@ let SchedModel = GFX10SpeedModel in { // The latency values are 1 / (operations / cycle). // Add 1 stall cycle for VGPR read. def : HWWriteRes; +def : HWWriteRes; def : HWWriteRes; def : HWWriteRes; def : HWWriteRes; diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index 554612427efd..d39e698d9809 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -190,7 +190,7 @@ defm V_CVT_U32_F64 : VOP1Inst <"v_cvt_u32_f64", VOP_I32_F64, fp_to_uint>; defm V_CVT_F64_U32 : VOP1Inst <"v_cvt_f64_u32", VOP1_F64_I32, uint_to_fp>; } // End SchedRW = [WriteDoubleCvt] -let SchedRW = [WriteQuarterRate32] in { +let SchedRW = [WriteFloatCvt] in { defm V_CVT_F32_I32 : VOP1Inst <"v_cvt_f32_i32", VOP1_F32_I32, sint_to_fp>; defm V_CVT_F32_U32 : VOP1Inst <"v_cvt_f32_u32", VOP1_F32_I32, uint_to_fp>; defm V_CVT_U32_F32 : VOP1Inst <"v_cvt_u32_f32", VOP_I32_F32, fp_to_uint>; @@ -202,7 +202,7 @@ defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_F16, fpextend>; defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>; defm V_CVT_FLR_I32_F32 : VOP1Inst <"v_cvt_flr_i32_f32", VOP_I32_F32, cvt_flr_i32_f32>; defm V_CVT_OFF_F32_I4 : VOP1Inst <"v_cvt_off_f32_i4", VOP1_F32_I32>; -} // End SchedRW = [WriteQuarterRate32] +} // End SchedRW = [WriteFloatCvt] defm V_CVT_F32_UBYTE0 : VOP1Inst <"v_cvt_f32_ubyte0", VOP1_F32_I32, AMDGPUcvt_f32_ubyte0>; defm V_CVT_F32_UBYTE1 : VOP1Inst <"v_cvt_f32_ubyte1", VOP1_F32_I32, AMDGPUcvt_f32_ubyte1>;