forked from OSchip/llvm-project
R600: Consolidate sub register indices.
Use sub0-15 everywhere. Patch by: Michel Dänzerr Reviewed-by: Tom Stellard <thomas.stellard@amd.com> Signed-off-by: Michel Dänzer <michel.daenzer@amd.com> llvm-svn: 174610
This commit is contained in:
parent
e06163a9a6
commit
9355b22180
|
@ -196,8 +196,8 @@ class Vector_Build <ValueType vecType, RegisterClass vectorClass,
|
|||
(vecType (build_vector (elemType elemClass:$x), (elemType elemClass:$y),
|
||||
(elemType elemClass:$z), (elemType elemClass:$w))),
|
||||
(INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
|
||||
(vecType (IMPLICIT_DEF)), elemClass:$x, sel_x), elemClass:$y, sel_y),
|
||||
elemClass:$z, sel_z), elemClass:$w, sel_w)
|
||||
(vecType (IMPLICIT_DEF)), elemClass:$x, sub0), elemClass:$y, sub1),
|
||||
elemClass:$z, sub2), elemClass:$w, sub3)
|
||||
>;
|
||||
|
||||
// bitconvert pattern
|
||||
|
|
|
@ -51,22 +51,22 @@ unsigned AMDGPURegisterInfo::getFrameRegister(const MachineFunction &MF) const {
|
|||
unsigned AMDGPURegisterInfo::getIndirectSubReg(unsigned IndirectIndex) const {
|
||||
|
||||
switch(IndirectIndex) {
|
||||
case 0: return AMDGPU::indirect_0;
|
||||
case 1: return AMDGPU::indirect_1;
|
||||
case 2: return AMDGPU::indirect_2;
|
||||
case 3: return AMDGPU::indirect_3;
|
||||
case 4: return AMDGPU::indirect_4;
|
||||
case 5: return AMDGPU::indirect_5;
|
||||
case 6: return AMDGPU::indirect_6;
|
||||
case 7: return AMDGPU::indirect_7;
|
||||
case 8: return AMDGPU::indirect_8;
|
||||
case 9: return AMDGPU::indirect_9;
|
||||
case 10: return AMDGPU::indirect_10;
|
||||
case 11: return AMDGPU::indirect_11;
|
||||
case 12: return AMDGPU::indirect_12;
|
||||
case 13: return AMDGPU::indirect_13;
|
||||
case 14: return AMDGPU::indirect_14;
|
||||
case 15: return AMDGPU::indirect_15;
|
||||
case 0: return AMDGPU::sub0;
|
||||
case 1: return AMDGPU::sub1;
|
||||
case 2: return AMDGPU::sub2;
|
||||
case 3: return AMDGPU::sub3;
|
||||
case 4: return AMDGPU::sub4;
|
||||
case 5: return AMDGPU::sub5;
|
||||
case 6: return AMDGPU::sub6;
|
||||
case 7: return AMDGPU::sub7;
|
||||
case 8: return AMDGPU::sub8;
|
||||
case 9: return AMDGPU::sub9;
|
||||
case 10: return AMDGPU::sub10;
|
||||
case 11: return AMDGPU::sub11;
|
||||
case 12: return AMDGPU::sub12;
|
||||
case 13: return AMDGPU::sub13;
|
||||
case 14: return AMDGPU::sub14;
|
||||
case 15: return AMDGPU::sub15;
|
||||
default: llvm_unreachable("indirect index out of range");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -12,14 +12,9 @@
|
|||
//===----------------------------------------------------------------------===//
|
||||
|
||||
let Namespace = "AMDGPU" in {
|
||||
def sel_x : SubRegIndex;
|
||||
def sel_y : SubRegIndex;
|
||||
def sel_z : SubRegIndex;
|
||||
def sel_w : SubRegIndex;
|
||||
|
||||
|
||||
foreach Index = 0-15 in {
|
||||
def indirect_#Index : SubRegIndex;
|
||||
def sub#Index : SubRegIndex;
|
||||
}
|
||||
|
||||
def INDIRECT_BASE_ADDR : Register <"INDIRECT_BASE_ADDR">;
|
||||
|
|
|
@ -578,13 +578,13 @@ class ExportBufWord1 {
|
|||
multiclass ExportPattern<Instruction ExportInst, bits<8> cf_inst> {
|
||||
def : Pat<(int_R600_store_pixel_depth R600_Reg32:$reg),
|
||||
(ExportInst
|
||||
(INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sel_x),
|
||||
(INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sub0),
|
||||
0, 61, 0, 7, 7, 7, cf_inst, 0)
|
||||
>;
|
||||
|
||||
def : Pat<(int_R600_store_pixel_stencil R600_Reg32:$reg),
|
||||
(ExportInst
|
||||
(INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sel_x),
|
||||
(INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sub0),
|
||||
0, 61, 7, 0, 7, 7, cf_inst, 0)
|
||||
>;
|
||||
|
||||
|
@ -1868,25 +1868,25 @@ def : Pat <
|
|||
(SETNE_DX10 R600_Reg32:$src0, R600_Reg32:$src1)
|
||||
>;
|
||||
|
||||
def : Extract_Element <f32, v4f32, R600_Reg128, 0, sel_x>;
|
||||
def : Extract_Element <f32, v4f32, R600_Reg128, 1, sel_y>;
|
||||
def : Extract_Element <f32, v4f32, R600_Reg128, 2, sel_z>;
|
||||
def : Extract_Element <f32, v4f32, R600_Reg128, 3, sel_w>;
|
||||
def : Extract_Element <f32, v4f32, R600_Reg128, 0, sub0>;
|
||||
def : Extract_Element <f32, v4f32, R600_Reg128, 1, sub1>;
|
||||
def : Extract_Element <f32, v4f32, R600_Reg128, 2, sub2>;
|
||||
def : Extract_Element <f32, v4f32, R600_Reg128, 3, sub3>;
|
||||
|
||||
def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 0, sel_x>;
|
||||
def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 1, sel_y>;
|
||||
def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 2, sel_z>;
|
||||
def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 3, sel_w>;
|
||||
def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 0, sub0>;
|
||||
def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 1, sub1>;
|
||||
def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 2, sub2>;
|
||||
def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 3, sub3>;
|
||||
|
||||
def : Extract_Element <i32, v4i32, R600_Reg128, 0, sel_x>;
|
||||
def : Extract_Element <i32, v4i32, R600_Reg128, 1, sel_y>;
|
||||
def : Extract_Element <i32, v4i32, R600_Reg128, 2, sel_z>;
|
||||
def : Extract_Element <i32, v4i32, R600_Reg128, 3, sel_w>;
|
||||
def : Extract_Element <i32, v4i32, R600_Reg128, 0, sub0>;
|
||||
def : Extract_Element <i32, v4i32, R600_Reg128, 1, sub1>;
|
||||
def : Extract_Element <i32, v4i32, R600_Reg128, 2, sub2>;
|
||||
def : Extract_Element <i32, v4i32, R600_Reg128, 3, sub3>;
|
||||
|
||||
def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 0, sel_x>;
|
||||
def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 1, sel_y>;
|
||||
def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 2, sel_z>;
|
||||
def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 3, sel_w>;
|
||||
def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 0, sub0>;
|
||||
def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 1, sub1>;
|
||||
def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 2, sub2>;
|
||||
def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 3, sub3>;
|
||||
|
||||
def : Vector_Build <v4f32, R600_Reg128, f32, R600_Reg32>;
|
||||
def : Vector_Build <v4i32, R600_Reg128, i32, R600_Reg32>;
|
||||
|
|
|
@ -84,10 +84,10 @@ const TargetRegisterClass * R600RegisterInfo::getCFGStructurizerRegClass(
|
|||
unsigned R600RegisterInfo::getSubRegFromChannel(unsigned Channel) const {
|
||||
switch (Channel) {
|
||||
default: assert(!"Invalid channel index"); return 0;
|
||||
case 0: return AMDGPU::sel_x;
|
||||
case 1: return AMDGPU::sel_y;
|
||||
case 2: return AMDGPU::sel_z;
|
||||
case 3: return AMDGPU::sel_w;
|
||||
case 0: return AMDGPU::sub0;
|
||||
case 1: return AMDGPU::sub1;
|
||||
case 2: return AMDGPU::sub2;
|
||||
case 3: return AMDGPU::sub3;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -19,7 +19,7 @@ class R600RegWithChan <string name, bits<9> sel, string chan> :
|
|||
class R600Reg_128<string n, list<Register> subregs, bits<16> encoding> :
|
||||
RegisterWithSubRegs<n, subregs> {
|
||||
let Namespace = "AMDGPU";
|
||||
let SubRegIndices = [sel_x, sel_y, sel_z, sel_w];
|
||||
let SubRegIndices = [sub0, sub1, sub2, sub3];
|
||||
let HWEncoding = encoding;
|
||||
}
|
||||
|
||||
|
@ -126,9 +126,8 @@ class IndirectSuper<string n, list<Register> subregs> :
|
|||
RegisterWithSubRegs<n, subregs> {
|
||||
let Namespace = "AMDGPU";
|
||||
let SubRegIndices =
|
||||
[indirect_0,indirect_1,indirect_2,indirect_3,indirect_4,indirect_5,indirect_6,
|
||||
indirect_7,indirect_8,indirect_9,indirect_10,indirect_11,indirect_12,
|
||||
indirect_13,indirect_14,indirect_15];
|
||||
[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
|
||||
sub8, sub9, sub10, sub11, sub12, sub13, sub14, sub15];
|
||||
}
|
||||
|
||||
def IndirectSuperReg : IndirectSuper<"Indirect",
|
||||
|
|
|
@ -1215,15 +1215,15 @@ def CLAMP_SI : CLAMP<VReg_32>;
|
|||
def FABS_SI : FABS<VReg_32>;
|
||||
def FNEG_SI : FNEG<VReg_32>;
|
||||
|
||||
def : Extract_Element <f32, v4f32, VReg_128, 0, sel_x>;
|
||||
def : Extract_Element <f32, v4f32, VReg_128, 1, sel_y>;
|
||||
def : Extract_Element <f32, v4f32, VReg_128, 2, sel_z>;
|
||||
def : Extract_Element <f32, v4f32, VReg_128, 3, sel_w>;
|
||||
def : Extract_Element <f32, v4f32, VReg_128, 0, sub0>;
|
||||
def : Extract_Element <f32, v4f32, VReg_128, 1, sub1>;
|
||||
def : Extract_Element <f32, v4f32, VReg_128, 2, sub2>;
|
||||
def : Extract_Element <f32, v4f32, VReg_128, 3, sub3>;
|
||||
|
||||
def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 4, sel_x>;
|
||||
def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 5, sel_y>;
|
||||
def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 6, sel_z>;
|
||||
def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 7, sel_w>;
|
||||
def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 4, sub0>;
|
||||
def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 5, sub1>;
|
||||
def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 6, sub2>;
|
||||
def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 7, sub3>;
|
||||
|
||||
def : Vector_Build <v4f32, VReg_128, f32, VReg_32>;
|
||||
def : Vector_Build <v4i32, SReg_128, i32, SReg_32>;
|
||||
|
@ -1338,22 +1338,22 @@ def : Pat <
|
|||
def : Pat <
|
||||
(int_AMDGPU_cube VReg_128:$src),
|
||||
(INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)),
|
||||
(V_CUBETC_F32 (EXTRACT_SUBREG VReg_128:$src, sel_x),
|
||||
(EXTRACT_SUBREG VReg_128:$src, sel_y),
|
||||
(EXTRACT_SUBREG VReg_128:$src, sel_z),
|
||||
0, 0, 0, 0), sel_x),
|
||||
(V_CUBESC_F32 (EXTRACT_SUBREG VReg_128:$src, sel_x),
|
||||
(EXTRACT_SUBREG VReg_128:$src, sel_y),
|
||||
(EXTRACT_SUBREG VReg_128:$src, sel_z),
|
||||
0, 0, 0, 0), sel_y),
|
||||
(V_CUBEMA_F32 (EXTRACT_SUBREG VReg_128:$src, sel_x),
|
||||
(EXTRACT_SUBREG VReg_128:$src, sel_y),
|
||||
(EXTRACT_SUBREG VReg_128:$src, sel_z),
|
||||
0, 0, 0, 0), sel_z),
|
||||
(V_CUBEID_F32 (EXTRACT_SUBREG VReg_128:$src, sel_x),
|
||||
(EXTRACT_SUBREG VReg_128:$src, sel_y),
|
||||
(EXTRACT_SUBREG VReg_128:$src, sel_z),
|
||||
0, 0, 0, 0), sel_w)
|
||||
(V_CUBETC_F32 (EXTRACT_SUBREG VReg_128:$src, sub0),
|
||||
(EXTRACT_SUBREG VReg_128:$src, sub1),
|
||||
(EXTRACT_SUBREG VReg_128:$src, sub2),
|
||||
0, 0, 0, 0), sub0),
|
||||
(V_CUBESC_F32 (EXTRACT_SUBREG VReg_128:$src, sub0),
|
||||
(EXTRACT_SUBREG VReg_128:$src, sub1),
|
||||
(EXTRACT_SUBREG VReg_128:$src, sub2),
|
||||
0, 0, 0, 0), sub1),
|
||||
(V_CUBEMA_F32 (EXTRACT_SUBREG VReg_128:$src, sub0),
|
||||
(EXTRACT_SUBREG VReg_128:$src, sub1),
|
||||
(EXTRACT_SUBREG VReg_128:$src, sub2),
|
||||
0, 0, 0, 0), sub2),
|
||||
(V_CUBEID_F32 (EXTRACT_SUBREG VReg_128:$src, sub0),
|
||||
(EXTRACT_SUBREG VReg_128:$src, sub1),
|
||||
(EXTRACT_SUBREG VReg_128:$src, sub2),
|
||||
0, 0, 0, 0), sub3)
|
||||
>;
|
||||
|
||||
/********** ================== **********/
|
||||
|
|
|
@ -1,18 +1,4 @@
|
|||
|
||||
let Namespace = "AMDGPU" in {
|
||||
def low : SubRegIndex;
|
||||
def high : SubRegIndex;
|
||||
|
||||
def sub0 : SubRegIndex;
|
||||
def sub1 : SubRegIndex;
|
||||
def sub2 : SubRegIndex;
|
||||
def sub3 : SubRegIndex;
|
||||
def sub4 : SubRegIndex;
|
||||
def sub5 : SubRegIndex;
|
||||
def sub6 : SubRegIndex;
|
||||
def sub7 : SubRegIndex;
|
||||
}
|
||||
|
||||
class SIReg <string n, bits<16> encoding = 0> : Register<n> {
|
||||
let Namespace = "AMDGPU";
|
||||
let HWEncoding = encoding;
|
||||
|
@ -20,7 +6,7 @@ class SIReg <string n, bits<16> encoding = 0> : Register<n> {
|
|||
|
||||
class SI_64 <string n, list<Register> subregs, bits<16> encoding> : RegisterWithSubRegs<n, subregs> {
|
||||
let Namespace = "AMDGPU";
|
||||
let SubRegIndices = [low, high];
|
||||
let SubRegIndices = [sub0, sub1];
|
||||
let HWEncoding = encoding;
|
||||
}
|
||||
|
||||
|
@ -73,12 +59,12 @@ def SGPR_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
|
|||
(add (sequence "SGPR%u", 0, 101))>;
|
||||
|
||||
// SGPR 64-bit registers
|
||||
def SGPR_64 : RegisterTuples<[low, high],
|
||||
def SGPR_64 : RegisterTuples<[sub0, sub1],
|
||||
[(add (decimate SGPR_32, 2)),
|
||||
(add(decimate (rotl SGPR_32, 1), 2))]>;
|
||||
|
||||
// SGPR 128-bit registers
|
||||
def SGPR_128 : RegisterTuples<[sel_x, sel_y, sel_z, sel_w],
|
||||
def SGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3],
|
||||
[(add (decimate SGPR_32, 4)),
|
||||
(add (decimate (rotl SGPR_32, 1), 4)),
|
||||
(add (decimate (rotl SGPR_32, 2), 4)),
|
||||
|
@ -104,12 +90,12 @@ def VGPR_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
|
|||
(add (sequence "VGPR%u", 0, 255))>;
|
||||
|
||||
// VGPR 64-bit registers
|
||||
def VGPR_64 : RegisterTuples<[low, high],
|
||||
def VGPR_64 : RegisterTuples<[sub0, sub1],
|
||||
[(add VGPR_32),
|
||||
(add (rotl VGPR_32, 1))]>;
|
||||
|
||||
// VGPR 128-bit registers
|
||||
def VGPR_128 : RegisterTuples<[sel_x, sel_y, sel_z, sel_w],
|
||||
def VGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3],
|
||||
[(add VGPR_32),
|
||||
(add (rotl VGPR_32, 1)),
|
||||
(add (rotl VGPR_32, 2)),
|
||||
|
|
Loading…
Reference in New Issue