forked from OSchip/llvm-project
R600/SI: Add a V_MOV_B64 pseudo instruction
This is used to simplify the SIFoldOperands pass and make it easier to fold immediates. llvm-svn: 225373
This commit is contained in:
parent
290ece7d4c
commit
4842c05216
|
@ -86,6 +86,7 @@ static bool isSafeToFold(unsigned Opcode) {
|
|||
switch(Opcode) {
|
||||
case AMDGPU::V_MOV_B32_e32:
|
||||
case AMDGPU::V_MOV_B32_e64:
|
||||
case AMDGPU::V_MOV_B64_PSEUDO:
|
||||
case AMDGPU::S_MOV_B32:
|
||||
case AMDGPU::S_MOV_B64:
|
||||
case AMDGPU::COPY:
|
||||
|
|
|
@ -424,6 +424,8 @@ unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const {
|
|||
return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
|
||||
} else if (DstRC->getSize() == 8 && RI.isSGPRClass(DstRC)) {
|
||||
return AMDGPU::S_MOV_B64;
|
||||
} else if (DstRC->getSize() == 8 && !RI.isSGPRClass(DstRC)) {
|
||||
return AMDGPU::V_MOV_B64_PSEUDO;
|
||||
}
|
||||
return AMDGPU::COPY;
|
||||
}
|
||||
|
@ -672,6 +674,35 @@ bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
|
|||
// This is just a placeholder for register allocation.
|
||||
MI->eraseFromParent();
|
||||
break;
|
||||
|
||||
case AMDGPU::V_MOV_B64_PSEUDO: {
|
||||
unsigned Dst = MI->getOperand(0).getReg();
|
||||
unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
|
||||
unsigned DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
|
||||
|
||||
const MachineOperand &SrcOp = MI->getOperand(1);
|
||||
// FIXME: Will this work for 64-bit floating point immediates?
|
||||
assert(!SrcOp.isFPImm());
|
||||
if (SrcOp.isImm()) {
|
||||
APInt Imm(64, SrcOp.getImm());
|
||||
BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo)
|
||||
.addImm(Imm.getLoBits(32).getZExtValue())
|
||||
.addReg(Dst, RegState::Implicit);
|
||||
BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi)
|
||||
.addImm(Imm.getHiBits(32).getZExtValue())
|
||||
.addReg(Dst, RegState::Implicit);
|
||||
} else {
|
||||
assert(SrcOp.isReg());
|
||||
BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo)
|
||||
.addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub0))
|
||||
.addReg(Dst, RegState::Implicit);
|
||||
BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi)
|
||||
.addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub1))
|
||||
.addReg(Dst, RegState::Implicit);
|
||||
}
|
||||
MI->eraseFromParent();
|
||||
break;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -1742,6 +1742,12 @@ defm V_TRIG_PREOP_F64 : VOP3Inst <
|
|||
//===----------------------------------------------------------------------===//
|
||||
let isCodeGenOnly = 1, isPseudo = 1 in {
|
||||
|
||||
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
|
||||
// 64-bit vector move instruction. This is mainly used by the SIFoldOperands
|
||||
// pass to enable folding of inline immediates.
|
||||
def V_MOV_B64_PSEUDO : InstSI <(outs VReg_64:$dst), (ins VSrc_64:$src0), "", []>;
|
||||
} // end let hasSideEffects = 0, mayLoad = 0, mayStore = 0
|
||||
|
||||
let hasSideEffects = 1 in {
|
||||
def SGPR_USE : InstSI <(outs),(ins), "", []>;
|
||||
}
|
||||
|
|
|
@ -20,9 +20,8 @@ define void @lds_atomic_cmpxchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrs
|
|||
; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_ret_i64_offset:
|
||||
; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; SI: s_load_dwordx2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd
|
||||
; SI: s_mov_b64 s{{\[}}[[LOSCMP:[0-9]+]]:[[HISCMP:[0-9]+]]{{\]}}, 7
|
||||
; SI-DAG: v_mov_b32_e32 v[[LOVCMP:[0-9]+]], s[[LOSCMP]]
|
||||
; SI-DAG: v_mov_b32_e32 v[[HIVCMP:[0-9]+]], s[[HISCMP]]
|
||||
; SI-DAG: v_mov_b32_e32 v[[LOVCMP:[0-9]+]], 7
|
||||
; SI-DAG: v_mov_b32_e32 v[[HIVCMP:[0-9]+]], 0
|
||||
; SI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
|
||||
; SI-DAG: v_mov_b32_e32 v[[LOSWAPV:[0-9]+]], s[[LOSWAP]]
|
||||
; SI-DAG: v_mov_b32_e32 v[[HISWAPV:[0-9]+]], s[[HISWAP]]
|
||||
|
@ -69,9 +68,8 @@ define void @lds_atomic_cmpxchg_noret_i32_offset(i32 addrspace(3)* %ptr, i32 %sw
|
|||
; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_noret_i64_offset:
|
||||
; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9
|
||||
; SI: s_load_dwordx2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; SI: s_mov_b64 s{{\[}}[[LOSCMP:[0-9]+]]:[[HISCMP:[0-9]+]]{{\]}}, 7
|
||||
; SI-DAG: v_mov_b32_e32 v[[LOVCMP:[0-9]+]], s[[LOSCMP]]
|
||||
; SI-DAG: v_mov_b32_e32 v[[HIVCMP:[0-9]+]], s[[HISCMP]]
|
||||
; SI-DAG: v_mov_b32_e32 v[[LOVCMP:[0-9]+]], 7
|
||||
; SI-DAG: v_mov_b32_e32 v[[HIVCMP:[0-9]+]], 0
|
||||
; SI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
|
||||
; SI-DAG: v_mov_b32_e32 v[[LOSWAPV:[0-9]+]], s[[LOSWAP]]
|
||||
; SI-DAG: v_mov_b32_e32 v[[HISWAPV:[0-9]+]], s[[HISWAP]]
|
||||
|
|
|
@ -474,12 +474,9 @@ define void @add_inline_imm_64_f64(double addrspace(1)* %out, double %x) {
|
|||
}
|
||||
|
||||
|
||||
; FIXME: These shoudn't bother materializing in SGPRs
|
||||
|
||||
; CHECK-LABEL: {{^}}store_inline_imm_0.0_f64
|
||||
; CHECK: s_mov_b64 s{{\[}}[[LO_SREG:[0-9]+]]:[[HI_SREG:[0-9]+]]{{\]}}, 0{{$}}
|
||||
; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG]]
|
||||
; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], s[[HI_SREG]]
|
||||
; CHECK: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0
|
||||
; CHECK: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0
|
||||
; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
|
||||
define void @store_inline_imm_0.0_f64(double addrspace(1)* %out) {
|
||||
store double 0.0, double addrspace(1)* %out
|
||||
|
|
|
@ -30,9 +30,8 @@ define void @lds_atomic_add_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %p
|
|||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_add_ret_i64_offset:
|
||||
; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; SI: s_mov_b64 s{{\[}}[[LOSDATA:[0-9]+]]:[[HISDATA:[0-9]+]]{{\]}}, 9
|
||||
; SI-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], s[[LOSDATA]]
|
||||
; SI-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], s[[HISDATA]]
|
||||
; SI-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9
|
||||
; SI-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0
|
||||
; SI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
|
||||
; SI: ds_add_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32 [M0]
|
||||
; SI: buffer_store_dwordx2 [[RESULT]],
|
||||
|
@ -45,9 +44,8 @@ define void @lds_atomic_add_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace
|
|||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_inc_ret_i64:
|
||||
; SI: s_mov_b64 s{{\[}}[[LOSDATA:[0-9]+]]:[[HISDATA:[0-9]+]]{{\]}}, -1
|
||||
; SI-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], s[[LOSDATA]]
|
||||
; SI-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], s[[HISDATA]]
|
||||
; SI: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], -1
|
||||
; SI: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], -1
|
||||
; SI: ds_inc_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
|
||||
; SI: buffer_store_dwordx2 [[RESULT]],
|
||||
; SI: s_endpgm
|
||||
|
@ -87,9 +85,8 @@ define void @lds_atomic_sub_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace
|
|||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_dec_ret_i64:
|
||||
; SI: s_mov_b64 s{{\[}}[[LOSDATA:[0-9]+]]:[[HISDATA:[0-9]+]]{{\]}}, -1
|
||||
; SI-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], s[[LOSDATA]]
|
||||
; SI-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], s[[HISDATA]]
|
||||
; SI: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], -1
|
||||
; SI: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], -1
|
||||
; SI: ds_dec_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
|
||||
; SI: buffer_store_dwordx2 [[RESULT]],
|
||||
; SI: s_endpgm
|
||||
|
@ -277,10 +274,9 @@ define void @lds_atomic_add_noret_i64(i64 addrspace(3)* %ptr) nounwind {
|
|||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_add_noret_i64_offset:
|
||||
; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9
|
||||
; SI: s_mov_b64 s{{\[}}[[LOSDATA:[0-9]+]]:[[HISDATA:[0-9]+]]{{\]}}, 9
|
||||
; SI-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], s[[LOSDATA]]
|
||||
; SI-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], s[[HISDATA]]
|
||||
; SI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
|
||||
; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
|
||||
; SI: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9
|
||||
; SI: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0
|
||||
; SI: ds_add_u64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32 [M0]
|
||||
; SI: s_endpgm
|
||||
define void @lds_atomic_add_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
|
||||
|
@ -290,9 +286,8 @@ define void @lds_atomic_add_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
|
|||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i64:
|
||||
; SI: s_mov_b64 s{{\[}}[[LOSDATA:[0-9]+]]:[[HISDATA:[0-9]+]]{{\]}}, -1
|
||||
; SI-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], s[[LOSDATA]]
|
||||
; SI-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], s[[HISDATA]]
|
||||
; SI: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], -1
|
||||
; SI: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], -1
|
||||
; SI: ds_inc_u64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
|
||||
; SI: s_endpgm
|
||||
define void @lds_atomic_inc_noret_i64(i64 addrspace(3)* %ptr) nounwind {
|
||||
|
@ -327,9 +322,8 @@ define void @lds_atomic_sub_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
|
|||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i64:
|
||||
; SI: s_mov_b64 s{{\[}}[[LOSDATA:[0-9]+]]:[[HISDATA:[0-9]+]]{{\]}}, -1
|
||||
; SI-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], s[[LOSDATA]]
|
||||
; SI-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], s[[HISDATA]]
|
||||
; SI: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], -1
|
||||
; SI: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], -1
|
||||
; SI: ds_dec_u64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
|
||||
; SI: s_endpgm
|
||||
define void @lds_atomic_dec_noret_i64(i64 addrspace(3)* %ptr) nounwind {
|
||||
|
|
Loading…
Reference in New Issue