forked from OSchip/llvm-project
R600/SI: Use correct dest register class for V_READFIRSTLANE_B32
This instructions writes to an 32-bit SGPR. This change required adding the 32-bit VCC_LO and VCC_HI registers, because the full VCC register is 64 bits. This fixes verifier errors on several of the indirect addressing piglit tests. Tested-by: Michel Dänzer <michel.daenzer@amd.com> llvm-svn: 204055
This commit is contained in:
parent
ca700e41ef
commit
fbe435de63
|
@ -210,7 +210,8 @@ void AMDGPUAsmPrinter::findNumUsedRegistersSI(MachineFunction &MF,
|
|||
continue;
|
||||
}
|
||||
unsigned reg = MO.getReg();
|
||||
if (reg == AMDGPU::VCC) {
|
||||
if (reg == AMDGPU::VCC || reg == AMDGPU::VCC_LO ||
|
||||
reg == AMDGPU::VCC_HI) {
|
||||
VCCUsed = true;
|
||||
continue;
|
||||
}
|
||||
|
|
|
@ -631,7 +631,18 @@ let neverHasSideEffects = 1, isMoveImm = 1 in {
|
|||
defm V_MOV_B32 : VOP1_32 <0x00000001, "V_MOV_B32", []>;
|
||||
} // End neverHasSideEffects = 1, isMoveImm = 1
|
||||
|
||||
defm V_READFIRSTLANE_B32 : VOP1_32 <0x00000002, "V_READFIRSTLANE_B32", []>;
|
||||
let Uses = [EXEC] in {
|
||||
|
||||
def V_READFIRSTLANE_B32 : VOP1 <
|
||||
0x00000002,
|
||||
(outs SReg_32:$vdst),
|
||||
(ins VReg_32:$src0),
|
||||
"V_READFIRSTLANE_B32 $vdst, $src0",
|
||||
[]
|
||||
>;
|
||||
|
||||
}
|
||||
|
||||
defm V_CVT_I32_F64 : VOP1_32_64 <0x00000003, "V_CVT_I32_F64",
|
||||
[(set i32:$dst, (fp_to_sint f64:$src0))]
|
||||
>;
|
||||
|
|
|
@ -345,12 +345,13 @@ void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) {
|
|||
.addReg(AMDGPU::EXEC);
|
||||
|
||||
// Read the next variant into VCC (lower 32 bits) <- also loop target
|
||||
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32_e32), AMDGPU::VCC)
|
||||
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
|
||||
AMDGPU::VCC_LO)
|
||||
.addReg(Idx);
|
||||
|
||||
// Move index from VCC into M0
|
||||
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
|
||||
.addReg(AMDGPU::VCC);
|
||||
.addReg(AMDGPU::VCC_LO);
|
||||
|
||||
// Compare the just read M0 value to all possible Idx values
|
||||
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMP_EQ_U32_e32), AMDGPU::VCC)
|
||||
|
|
|
@ -17,7 +17,16 @@ class SIReg <string n, bits<16> encoding = 0> : Register<n> {
|
|||
}
|
||||
|
||||
// Special Registers
|
||||
def VCC : SIReg<"VCC", 106>;
|
||||
def VCC_LO : SIReg<"vcc_lo", 106>;
|
||||
def VCC_HI : SIReg<"vcc_hi", 107>;
|
||||
|
||||
// VCC for 64-bit instructions
|
||||
def VCC : RegisterWithSubRegs<"VCC", [VCC_LO, VCC_HI]> {
|
||||
let Namespace = "AMDGPU";
|
||||
let SubRegIndices = [sub0, sub1];
|
||||
let HWEncoding = 106;
|
||||
}
|
||||
|
||||
def EXEC : SIReg<"EXEC", 126>;
|
||||
def SCC : SIReg<"SCC", 253>;
|
||||
def M0 : SIReg <"M0", 124>;
|
||||
|
@ -150,7 +159,7 @@ def M0Reg : RegisterClass<"AMDGPU", [i32], 32, (add M0)>;
|
|||
|
||||
// Register class for all scalar registers (SGPRs + Special Registers)
|
||||
def SReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
|
||||
(add SGPR_32, M0Reg)
|
||||
(add SGPR_32, M0Reg, VCC_LO)
|
||||
>;
|
||||
|
||||
def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64], 64, (add SGPR_64Regs)>;
|
||||
|
|
|
@ -13,10 +13,10 @@
|
|||
; R600-CHECK-NOT: ALU clause
|
||||
; R600-CHECK: 0 + AR.x
|
||||
|
||||
; SI-CHECK: V_READFIRSTLANE
|
||||
; SI-CHECK: V_READFIRSTLANE_B32 vcc_lo
|
||||
; SI-CHECK: V_MOVRELD
|
||||
; SI-CHECK: S_CBRANCH
|
||||
; SI-CHECK: V_READFIRSTLANE
|
||||
; SI-CHECK: V_READFIRSTLANE_B32 vcc_lo
|
||||
; SI-CHECK: V_MOVRELD
|
||||
; SI-CHECK: S_CBRANCH
|
||||
define void @mova_same_clause(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) {
|
||||
|
|
Loading…
Reference in New Issue