forked from OSchip/llvm-project
R600/SI: fix and cleanup SI register definition v2
Prevent producing real strange tablegen code by using proper register sizes, alignments and hierarchy. Also cleanup the unused definitions and add some comments. v2: add SGPR 512 bit registers, stop registers from wrapping around, fix SGPR alignment This is a candidate for the mesa-stable branch. Signed-off-by: Christian König <christian.koenig@amd.com> Reviewed-by: Tom Stellard <thomas.stellard@amd.com> llvm-svn: 176098
This commit is contained in:
parent
d76ed54b60
commit
0f0a8fe2dd
|
@ -918,14 +918,15 @@ def S_MAX_U32 : SOP2_32 <0x00000009, "S_MAX_U32", []>;
|
|||
def S_CSELECT_B32 : SOP2 <
|
||||
0x0000000a, (outs SReg_32:$dst),
|
||||
(ins SReg_32:$src0, SReg_32:$src1, SCCReg:$scc), "S_CSELECT_B32",
|
||||
[(set (i32 SReg_32:$dst), (select SCCReg:$scc, SReg_32:$src0, SReg_32:$src1))]
|
||||
[(set (i32 SReg_32:$dst), (select (i1 SCCReg:$scc),
|
||||
SReg_32:$src0, SReg_32:$src1))]
|
||||
>;
|
||||
|
||||
def S_CSELECT_B64 : SOP2_64 <0x0000000b, "S_CSELECT_B64", []>;
|
||||
|
||||
// f32 pattern for S_CSELECT_B32
|
||||
def : Pat <
|
||||
(f32 (select SCCReg:$scc, SReg_32:$src0, SReg_32:$src1)),
|
||||
(f32 (select (i1 SCCReg:$scc), SReg_32:$src0, SReg_32:$src1)),
|
||||
(S_CSELECT_B32 SReg_32:$src0, SReg_32:$src1, SCCReg:$scc)
|
||||
>;
|
||||
|
||||
|
|
|
@ -1,30 +1,40 @@
|
|||
//===-- SIRegisterInfo.td - SI Register defs ---------------*- tablegen -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Declarations that describe the SI registers
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class SIReg <string n, bits<16> encoding = 0> : Register<n> {
|
||||
let Namespace = "AMDGPU";
|
||||
let HWEncoding = encoding;
|
||||
}
|
||||
|
||||
class SI_64 <string n, list<Register> subregs, bits<16> encoding> : RegisterWithSubRegs<n, subregs> {
|
||||
let Namespace = "AMDGPU";
|
||||
let SubRegIndices = [sub0, sub1];
|
||||
let HWEncoding = encoding;
|
||||
}
|
||||
|
||||
class SGPR_32 <bits<16> num, string name> : SIReg<name, num>;
|
||||
|
||||
class VGPR_32 <bits<16> num, string name> : SIReg<name, num> {
|
||||
let HWEncoding{8} = 1;
|
||||
}
|
||||
|
||||
// Special Registers
|
||||
def VCC : SIReg<"VCC", 106>;
|
||||
def EXEC_LO : SIReg <"EXEC LO", 126>;
|
||||
def EXEC_HI : SIReg <"EXEC HI", 127>;
|
||||
def EXEC : SI_64<"EXEC", [EXEC_LO, EXEC_HI], 126>;
|
||||
def EXEC : SIReg<"EXEC", 126>;
|
||||
def SCC : SIReg<"SCC", 253>;
|
||||
def M0 : SIReg <"M0", 124>;
|
||||
|
||||
//Interpolation registers
|
||||
// SGPR registers
|
||||
foreach Index = 0-101 in {
|
||||
def SGPR#Index : SIReg <"SGPR"#Index, Index>;
|
||||
}
|
||||
|
||||
// VGPR registers
|
||||
foreach Index = 0-255 in {
|
||||
def VGPR#Index : SIReg <"VGPR"#Index, Index> {
|
||||
let HWEncoding{8} = 1;
|
||||
}
|
||||
}
|
||||
|
||||
// virtual Interpolation registers
|
||||
def PERSP_SAMPLE_I : SIReg <"PERSP_SAMPLE_I">;
|
||||
def PERSP_SAMPLE_J : SIReg <"PERSP_SAMPLE_J">;
|
||||
def PERSP_CENTER_I : SIReg <"PERSP_CENTER_I">;
|
||||
|
@ -50,102 +60,150 @@ def ANCILLARY : SIReg <"ANCILLARY">;
|
|||
def SAMPLE_COVERAGE : SIReg <"SAMPLE_COVERAGE">;
|
||||
def POS_FIXED_PT : SIReg <"POS_FIXED_PT">;
|
||||
|
||||
// SGPR 32-bit registers
|
||||
foreach Index = 0-101 in {
|
||||
def SGPR#Index : SGPR_32 <Index, "SGPR"#Index>;
|
||||
}
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Groupings using register classes and tuples
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// SGPR 32-bit registers
|
||||
def SGPR_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
|
||||
(add (sequence "SGPR%u", 0, 101))>;
|
||||
|
||||
// SGPR 64-bit registers
|
||||
def SGPR_64 : RegisterTuples<[sub0, sub1],
|
||||
[(add (decimate SGPR_32, 2)),
|
||||
(add(decimate (rotl SGPR_32, 1), 2))]>;
|
||||
[(add (decimate (trunc SGPR_32, 101), 2)),
|
||||
(add (decimate (shl SGPR_32, 1), 2))]>;
|
||||
|
||||
// SGPR 128-bit registers
|
||||
def SGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3],
|
||||
[(add (decimate SGPR_32, 4)),
|
||||
(add (decimate (rotl SGPR_32, 1), 4)),
|
||||
(add (decimate (rotl SGPR_32, 2), 4)),
|
||||
(add (decimate (rotl SGPR_32, 3), 4))]>;
|
||||
[(add (decimate (trunc SGPR_32, 99), 4)),
|
||||
(add (decimate (shl SGPR_32, 1), 4)),
|
||||
(add (decimate (shl SGPR_32, 2), 4)),
|
||||
(add (decimate (shl SGPR_32, 3), 4))]>;
|
||||
|
||||
// SGPR 256-bit registers
|
||||
def SGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7],
|
||||
[(add (decimate SGPR_32, 8)),
|
||||
(add (decimate (rotl SGPR_32, 1), 8)),
|
||||
(add (decimate (rotl SGPR_32, 2), 8)),
|
||||
(add (decimate (rotl SGPR_32, 3), 8)),
|
||||
(add (decimate (rotl SGPR_32, 4), 8)),
|
||||
(add (decimate (rotl SGPR_32, 5), 8)),
|
||||
(add (decimate (rotl SGPR_32, 6), 8)),
|
||||
(add (decimate (rotl SGPR_32, 7), 8))]>;
|
||||
[(add (decimate (trunc SGPR_32, 95), 4)),
|
||||
(add (decimate (shl SGPR_32, 1), 4)),
|
||||
(add (decimate (shl SGPR_32, 2), 4)),
|
||||
(add (decimate (shl SGPR_32, 3), 4)),
|
||||
(add (decimate (shl SGPR_32, 4), 4)),
|
||||
(add (decimate (shl SGPR_32, 5), 4)),
|
||||
(add (decimate (shl SGPR_32, 6), 4)),
|
||||
(add (decimate (shl SGPR_32, 7), 4))]>;
|
||||
|
||||
// SGPR 512-bit registers
|
||||
def SGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
|
||||
sub8, sub9, sub10, sub11, sub12, sub13, sub14, sub15],
|
||||
[(add (decimate (trunc SGPR_32, 87), 4)),
|
||||
(add (decimate (shl SGPR_32, 1), 4)),
|
||||
(add (decimate (shl SGPR_32, 2), 4)),
|
||||
(add (decimate (shl SGPR_32, 3), 4)),
|
||||
(add (decimate (shl SGPR_32, 4), 4)),
|
||||
(add (decimate (shl SGPR_32, 5), 4)),
|
||||
(add (decimate (shl SGPR_32, 6), 4)),
|
||||
(add (decimate (shl SGPR_32, 7), 4)),
|
||||
(add (decimate (shl SGPR_32, 8), 4)),
|
||||
(add (decimate (shl SGPR_32, 9), 4)),
|
||||
(add (decimate (shl SGPR_32, 10), 4)),
|
||||
(add (decimate (shl SGPR_32, 11), 4)),
|
||||
(add (decimate (shl SGPR_32, 12), 4)),
|
||||
(add (decimate (shl SGPR_32, 13), 4)),
|
||||
(add (decimate (shl SGPR_32, 14), 4)),
|
||||
(add (decimate (shl SGPR_32, 15), 4))]>;
|
||||
|
||||
// VGPR 32-bit registers
|
||||
foreach Index = 0-255 in {
|
||||
def VGPR#Index : VGPR_32 <Index, "VGPR"#Index>;
|
||||
}
|
||||
|
||||
def VGPR_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
|
||||
(add (sequence "VGPR%u", 0, 255))>;
|
||||
|
||||
// VGPR 64-bit registers
|
||||
def VGPR_64 : RegisterTuples<[sub0, sub1],
|
||||
[(add VGPR_32),
|
||||
(add (rotl VGPR_32, 1))]>;
|
||||
[(add (trunc VGPR_32, 255)),
|
||||
(add (shl VGPR_32, 1))]>;
|
||||
|
||||
// VGPR 128-bit registers
|
||||
def VGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3],
|
||||
[(add VGPR_32),
|
||||
(add (rotl VGPR_32, 1)),
|
||||
(add (rotl VGPR_32, 2)),
|
||||
(add (rotl VGPR_32, 3))]>;
|
||||
[(add (trunc VGPR_32, 253)),
|
||||
(add (shl VGPR_32, 1)),
|
||||
(add (shl VGPR_32, 2)),
|
||||
(add (shl VGPR_32, 3))]>;
|
||||
|
||||
// VGPR 256-bit registers
|
||||
def VGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7],
|
||||
[(add VGPR_32),
|
||||
(add (rotl VGPR_32, 1)),
|
||||
(add (rotl VGPR_32, 2)),
|
||||
(add (rotl VGPR_32, 3)),
|
||||
(add (rotl VGPR_32, 4)),
|
||||
(add (rotl VGPR_32, 5)),
|
||||
(add (rotl VGPR_32, 6)),
|
||||
(add (rotl VGPR_32, 7))]>;
|
||||
[(add (trunc VGPR_32, 249)),
|
||||
(add (shl VGPR_32, 1)),
|
||||
(add (shl VGPR_32, 2)),
|
||||
(add (shl VGPR_32, 3)),
|
||||
(add (shl VGPR_32, 4)),
|
||||
(add (shl VGPR_32, 5)),
|
||||
(add (shl VGPR_32, 6)),
|
||||
(add (shl VGPR_32, 7))]>;
|
||||
|
||||
// VGPR 512-bit registers
|
||||
def VGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
|
||||
sub8, sub9, sub10, sub11, sub12, sub13, sub14, sub15],
|
||||
[(add VGPR_32),
|
||||
(add (rotl VGPR_32, 1)),
|
||||
(add (rotl VGPR_32, 2)),
|
||||
(add (rotl VGPR_32, 3)),
|
||||
(add (rotl VGPR_32, 4)),
|
||||
(add (rotl VGPR_32, 5)),
|
||||
(add (rotl VGPR_32, 6)),
|
||||
(add (rotl VGPR_32, 7)),
|
||||
(add (rotl VGPR_32, 8)),
|
||||
(add (rotl VGPR_32, 9)),
|
||||
(add (rotl VGPR_32, 10)),
|
||||
(add (rotl VGPR_32, 11)),
|
||||
(add (rotl VGPR_32, 12)),
|
||||
(add (rotl VGPR_32, 13)),
|
||||
(add (rotl VGPR_32, 14)),
|
||||
(add (rotl VGPR_32, 15))]>;
|
||||
[(add (trunc VGPR_32, 241)),
|
||||
(add (shl VGPR_32, 1)),
|
||||
(add (shl VGPR_32, 2)),
|
||||
(add (shl VGPR_32, 3)),
|
||||
(add (shl VGPR_32, 4)),
|
||||
(add (shl VGPR_32, 5)),
|
||||
(add (shl VGPR_32, 6)),
|
||||
(add (shl VGPR_32, 7)),
|
||||
(add (shl VGPR_32, 8)),
|
||||
(add (shl VGPR_32, 9)),
|
||||
(add (shl VGPR_32, 10)),
|
||||
(add (shl VGPR_32, 11)),
|
||||
(add (shl VGPR_32, 12)),
|
||||
(add (shl VGPR_32, 13)),
|
||||
(add (shl VGPR_32, 14)),
|
||||
(add (shl VGPR_32, 15))]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Register classes used as source and destination
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Special register classes for predicates and the M0 register
|
||||
def SCCReg : RegisterClass<"AMDGPU", [i32, i1], 32, (add SCC)>;
|
||||
def VCCReg : RegisterClass<"AMDGPU", [i64, i1], 64, (add VCC)>;
|
||||
def EXECReg : RegisterClass<"AMDGPU", [i64, i1], 64, (add EXEC)>;
|
||||
def M0Reg : RegisterClass<"AMDGPU", [i32], 32, (add M0)>;
|
||||
|
||||
// Register class for all scalar registers (SGPRs + Special Registers)
|
||||
def SReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
|
||||
(add SGPR_32, M0, EXEC_LO, EXEC_HI)
|
||||
(add SGPR_32, M0Reg)
|
||||
>;
|
||||
|
||||
def SReg_64 : RegisterClass<"AMDGPU", [i1, i64], 64, (add SGPR_64, VCC, EXEC)>;
|
||||
def SReg_64 : RegisterClass<"AMDGPU", [i64, i1], 64,
|
||||
(add SGPR_64, VCCReg, EXECReg)
|
||||
>;
|
||||
|
||||
def SReg_128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, (add SGPR_128)>;
|
||||
|
||||
def SReg_256 : RegisterClass<"AMDGPU", [v8i32], 256, (add SGPR_256)>;
|
||||
|
||||
def SReg_512 : RegisterClass<"AMDGPU", [v16i32], 512, (add SGPR_512)>;
|
||||
|
||||
// Register class for all vector registers (VGPRs + Interploation Registers)
|
||||
def VReg_32 : RegisterClass<"AMDGPU", [f32, i32, v1i32], 32,
|
||||
(add VGPR_32,
|
||||
def VReg_32 : RegisterClass<"AMDGPU", [f32, i32, v1i32], 32, (add VGPR_32)>;
|
||||
|
||||
def VReg_64 : RegisterClass<"AMDGPU", [i64, v2i32], 64, (add VGPR_64)>;
|
||||
|
||||
def VReg_128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, (add VGPR_128)>;
|
||||
|
||||
def VReg_256 : RegisterClass<"AMDGPU", [v8i32], 256, (add VGPR_256)>;
|
||||
|
||||
def VReg_512 : RegisterClass<"AMDGPU", [v16i32], 512, (add VGPR_512)>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// [SV]Src_* register classes, can have either an immediate or an register
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def SSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add SReg_32)>;
|
||||
|
||||
def SSrc_64 : RegisterClass<"AMDGPU", [i64, i1], 64, (add SReg_64)>;
|
||||
|
||||
def VSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32,
|
||||
(add VReg_32, SReg_32,
|
||||
PERSP_SAMPLE_I, PERSP_SAMPLE_J,
|
||||
PERSP_CENTER_I, PERSP_CENTER_J,
|
||||
PERSP_CENTROID_I, PERSP_CENTROID_J,
|
||||
|
@ -165,26 +223,5 @@ def VReg_32 : RegisterClass<"AMDGPU", [f32, i32, v1i32], 32,
|
|||
)
|
||||
>;
|
||||
|
||||
def VReg_64 : RegisterClass<"AMDGPU", [i64, v2i32], 64, (add VGPR_64)>;
|
||||
|
||||
def VReg_128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, (add VGPR_128)>;
|
||||
|
||||
def VReg_256 : RegisterClass<"AMDGPU", [v8i32], 256, (add VGPR_256)>;
|
||||
|
||||
def VReg_512 : RegisterClass<"AMDGPU", [v16i32], 512, (add VGPR_512)>;
|
||||
|
||||
// [SV]Src_* operands can have either an immediate or an register
|
||||
def SSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add SReg_32)>;
|
||||
|
||||
def SSrc_64 : RegisterClass<"AMDGPU", [i1, i64], 64, (add SReg_64)>;
|
||||
|
||||
def VSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add VReg_32, SReg_32)>;
|
||||
|
||||
def VSrc_64 : RegisterClass<"AMDGPU", [i64], 64, (add SReg_64, VReg_64)>;
|
||||
|
||||
// Special register classes for predicates and the M0 register
|
||||
def SCCReg : RegisterClass<"AMDGPU", [i1], 1, (add SCC)>;
|
||||
def VCCReg : RegisterClass<"AMDGPU", [i1], 1, (add VCC)>;
|
||||
def EXECReg : RegisterClass<"AMDGPU", [i1], 1, (add EXEC)>;
|
||||
def M0Reg : RegisterClass<"AMDGPU", [i32], 32, (add M0)>;
|
||||
def VSrc_64 : RegisterClass<"AMDGPU", [i64], 64, (add VReg_64, SReg_64)>;
|
||||
|
||||
|
|
Loading…
Reference in New Issue