forked from OSchip/llvm-project
Implemented a MED3_S32 GIR opcode.
This commit is contained in:
parent
6604d81e1b
commit
c781c25412
|
@ -175,6 +175,7 @@ def : GINodeEquiv<G_AMDGPU_CVT_F32_UBYTE2, AMDGPUcvt_f32_ubyte2>;
|
||||||
def : GINodeEquiv<G_AMDGPU_CVT_F32_UBYTE3, AMDGPUcvt_f32_ubyte3>;
|
def : GINodeEquiv<G_AMDGPU_CVT_F32_UBYTE3, AMDGPUcvt_f32_ubyte3>;
|
||||||
|
|
||||||
def : GINodeEquiv<G_AMDGPU_CVT_PK_I16_I32, AMDGPUpk_i16_i32_impl>;
|
def : GINodeEquiv<G_AMDGPU_CVT_PK_I16_I32, AMDGPUpk_i16_i32_impl>;
|
||||||
|
def : GINodeEquiv<G_AMDGPU_MED3_S32, AMDGPUsmed3>;
|
||||||
|
|
||||||
def : GINodeEquiv<G_AMDGPU_ATOMIC_CMPXCHG, AMDGPUatomic_cmp_swap>;
|
def : GINodeEquiv<G_AMDGPU_ATOMIC_CMPXCHG, AMDGPUatomic_cmp_swap>;
|
||||||
def : GINodeEquiv<G_AMDGPU_BUFFER_LOAD, SIbuffer_load>;
|
def : GINodeEquiv<G_AMDGPU_BUFFER_LOAD, SIbuffer_load>;
|
||||||
|
|
|
@ -152,9 +152,14 @@ void AMDGPUPreLegalizerCombinerHelper::applyClampI64ToI16(
|
||||||
Register MedDst = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
|
Register MedDst = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
|
||||||
MRI.setType(MedDst, S32);
|
MRI.setType(MedDst, S32);
|
||||||
|
|
||||||
B.buildInstr(AMDGPU::V_MED3_I32,
|
Register CvtDst32 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
|
||||||
|
MRI.setType(CvtDst32, S32);
|
||||||
|
|
||||||
|
B.buildBitcast(CvtDst32, CvtDst);
|
||||||
|
|
||||||
|
B.buildInstr(AMDGPU::G_AMDGPU_MED3_S32,
|
||||||
{MedDst},
|
{MedDst},
|
||||||
{MinBoundaryDst.getReg(0), CvtDst, MaxBoundaryDst.getReg(0)},
|
{MinBoundaryDst.getReg(0), CvtDst32, MaxBoundaryDst.getReg(0)},
|
||||||
MI.getFlags());
|
MI.getFlags());
|
||||||
|
|
||||||
Register TruncDst = MRI.createGenericVirtualRegister(LLT::scalar(16));
|
Register TruncDst = MRI.createGenericVirtualRegister(LLT::scalar(16));
|
||||||
|
|
|
@ -3622,6 +3622,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
|
||||||
case AMDGPU::G_AMDGPU_CVT_F32_UBYTE2:
|
case AMDGPU::G_AMDGPU_CVT_F32_UBYTE2:
|
||||||
case AMDGPU::G_AMDGPU_CVT_F32_UBYTE3:
|
case AMDGPU::G_AMDGPU_CVT_F32_UBYTE3:
|
||||||
case AMDGPU::G_AMDGPU_CVT_PK_I16_I32:
|
case AMDGPU::G_AMDGPU_CVT_PK_I16_I32:
|
||||||
|
case AMDGPU::G_AMDGPU_MED3_S32:
|
||||||
return getDefaultMappingVOP(MI);
|
return getDefaultMappingVOP(MI);
|
||||||
case AMDGPU::G_UMULH:
|
case AMDGPU::G_UMULH:
|
||||||
case AMDGPU::G_SMULH: {
|
case AMDGPU::G_SMULH: {
|
||||||
|
|
|
@ -2581,6 +2581,12 @@ def G_AMDGPU_CVT_PK_I16_I32 : AMDGPUGenericInstruction {
|
||||||
let hasSideEffects = 0;
|
let hasSideEffects = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def G_AMDGPU_MED3_S32 : AMDGPUGenericInstruction {
|
||||||
|
let OutOperandList = (outs type0:$dst);
|
||||||
|
let InOperandList = (ins type0:$src0, type0:$src1, type0:$src2);
|
||||||
|
let hasSideEffects = 0;
|
||||||
|
}
|
||||||
|
|
||||||
// Atomic cmpxchg. $cmpval ad $newval are packed in a single vector
|
// Atomic cmpxchg. $cmpval ad $newval are packed in a single vector
|
||||||
// operand Expects a MachineMemOperand in addition to explicit
|
// operand Expects a MachineMemOperand in addition to explicit
|
||||||
// operands.
|
// operands.
|
||||||
|
|
Loading…
Reference in New Issue