R600/SI: fix and cleanup SI register definition v2

Prevent producing real strange tablegen code by using
proper register sizes, alignments and hierarchy.

Also cleanup the unused definitions and add some comments.

v2: add SGPR 512 bit registers, stop registers from wrapping around,
    fix SGPR alignment

This is a candidate for the mesa-stable branch.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Tom Stellard <thomas.stellard@amd.com>
llvm-svn: 176098
This commit is contained in:
Christian Konig 2013-02-26 17:52:03 +00:00
parent d76ed54b60
commit 0f0a8fe2dd
2 changed files with 135 additions and 97 deletions

View File

@ -918,14 +918,15 @@ def S_MAX_U32 : SOP2_32 <0x00000009, "S_MAX_U32", []>;
def S_CSELECT_B32 : SOP2 <
0x0000000a, (outs SReg_32:$dst),
(ins SReg_32:$src0, SReg_32:$src1, SCCReg:$scc), "S_CSELECT_B32",
[(set (i32 SReg_32:$dst), (select SCCReg:$scc, SReg_32:$src0, SReg_32:$src1))]
[(set (i32 SReg_32:$dst), (select (i1 SCCReg:$scc),
SReg_32:$src0, SReg_32:$src1))]
>;
def S_CSELECT_B64 : SOP2_64 <0x0000000b, "S_CSELECT_B64", []>;
// f32 pattern for S_CSELECT_B32
def : Pat <
(f32 (select SCCReg:$scc, SReg_32:$src0, SReg_32:$src1)),
(f32 (select (i1 SCCReg:$scc), SReg_32:$src0, SReg_32:$src1)),
(S_CSELECT_B32 SReg_32:$src0, SReg_32:$src1, SCCReg:$scc)
>;

View File

@ -1,30 +1,40 @@
//===-- SIRegisterInfo.td - SI Register defs ---------------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// Declarations that describe the SI registers
//===----------------------------------------------------------------------===//
class SIReg <string n, bits<16> encoding = 0> : Register<n> {
let Namespace = "AMDGPU";
let HWEncoding = encoding;
}
class SI_64 <string n, list<Register> subregs, bits<16> encoding> : RegisterWithSubRegs<n, subregs> {
let Namespace = "AMDGPU";
let SubRegIndices = [sub0, sub1];
let HWEncoding = encoding;
}
class SGPR_32 <bits<16> num, string name> : SIReg<name, num>;
class VGPR_32 <bits<16> num, string name> : SIReg<name, num> {
let HWEncoding{8} = 1;
}
// Special Registers
def VCC : SIReg<"VCC", 106>;
def EXEC_LO : SIReg <"EXEC LO", 126>;
def EXEC_HI : SIReg <"EXEC HI", 127>;
def EXEC : SI_64<"EXEC", [EXEC_LO, EXEC_HI], 126>;
def EXEC : SIReg<"EXEC", 126>;
def SCC : SIReg<"SCC", 253>;
def M0 : SIReg <"M0", 124>;
//Interpolation registers
// SGPR registers
foreach Index = 0-101 in {
def SGPR#Index : SIReg <"SGPR"#Index, Index>;
}
// VGPR registers
foreach Index = 0-255 in {
def VGPR#Index : SIReg <"VGPR"#Index, Index> {
let HWEncoding{8} = 1;
}
}
// virtual Interpolation registers
def PERSP_SAMPLE_I : SIReg <"PERSP_SAMPLE_I">;
def PERSP_SAMPLE_J : SIReg <"PERSP_SAMPLE_J">;
def PERSP_CENTER_I : SIReg <"PERSP_CENTER_I">;
@ -50,102 +60,150 @@ def ANCILLARY : SIReg <"ANCILLARY">;
def SAMPLE_COVERAGE : SIReg <"SAMPLE_COVERAGE">;
def POS_FIXED_PT : SIReg <"POS_FIXED_PT">;
// SGPR 32-bit registers
foreach Index = 0-101 in {
def SGPR#Index : SGPR_32 <Index, "SGPR"#Index>;
}
//===----------------------------------------------------------------------===//
// Groupings using register classes and tuples
//===----------------------------------------------------------------------===//
// SGPR 32-bit registers
def SGPR_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
(add (sequence "SGPR%u", 0, 101))>;
// SGPR 64-bit registers
def SGPR_64 : RegisterTuples<[sub0, sub1],
[(add (decimate SGPR_32, 2)),
(add(decimate (rotl SGPR_32, 1), 2))]>;
[(add (decimate (trunc SGPR_32, 101), 2)),
(add (decimate (shl SGPR_32, 1), 2))]>;
// SGPR 128-bit registers
def SGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3],
[(add (decimate SGPR_32, 4)),
(add (decimate (rotl SGPR_32, 1), 4)),
(add (decimate (rotl SGPR_32, 2), 4)),
(add (decimate (rotl SGPR_32, 3), 4))]>;
[(add (decimate (trunc SGPR_32, 99), 4)),
(add (decimate (shl SGPR_32, 1), 4)),
(add (decimate (shl SGPR_32, 2), 4)),
(add (decimate (shl SGPR_32, 3), 4))]>;
// SGPR 256-bit registers
def SGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7],
[(add (decimate SGPR_32, 8)),
(add (decimate (rotl SGPR_32, 1), 8)),
(add (decimate (rotl SGPR_32, 2), 8)),
(add (decimate (rotl SGPR_32, 3), 8)),
(add (decimate (rotl SGPR_32, 4), 8)),
(add (decimate (rotl SGPR_32, 5), 8)),
(add (decimate (rotl SGPR_32, 6), 8)),
(add (decimate (rotl SGPR_32, 7), 8))]>;
[(add (decimate (trunc SGPR_32, 95), 4)),
(add (decimate (shl SGPR_32, 1), 4)),
(add (decimate (shl SGPR_32, 2), 4)),
(add (decimate (shl SGPR_32, 3), 4)),
(add (decimate (shl SGPR_32, 4), 4)),
(add (decimate (shl SGPR_32, 5), 4)),
(add (decimate (shl SGPR_32, 6), 4)),
(add (decimate (shl SGPR_32, 7), 4))]>;
// SGPR 512-bit registers
def SGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
sub8, sub9, sub10, sub11, sub12, sub13, sub14, sub15],
[(add (decimate (trunc SGPR_32, 87), 4)),
(add (decimate (shl SGPR_32, 1), 4)),
(add (decimate (shl SGPR_32, 2), 4)),
(add (decimate (shl SGPR_32, 3), 4)),
(add (decimate (shl SGPR_32, 4), 4)),
(add (decimate (shl SGPR_32, 5), 4)),
(add (decimate (shl SGPR_32, 6), 4)),
(add (decimate (shl SGPR_32, 7), 4)),
(add (decimate (shl SGPR_32, 8), 4)),
(add (decimate (shl SGPR_32, 9), 4)),
(add (decimate (shl SGPR_32, 10), 4)),
(add (decimate (shl SGPR_32, 11), 4)),
(add (decimate (shl SGPR_32, 12), 4)),
(add (decimate (shl SGPR_32, 13), 4)),
(add (decimate (shl SGPR_32, 14), 4)),
(add (decimate (shl SGPR_32, 15), 4))]>;
// VGPR 32-bit registers
foreach Index = 0-255 in {
def VGPR#Index : VGPR_32 <Index, "VGPR"#Index>;
}
def VGPR_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
(add (sequence "VGPR%u", 0, 255))>;
// VGPR 64-bit registers
def VGPR_64 : RegisterTuples<[sub0, sub1],
[(add VGPR_32),
(add (rotl VGPR_32, 1))]>;
[(add (trunc VGPR_32, 255)),
(add (shl VGPR_32, 1))]>;
// VGPR 128-bit registers
def VGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3],
[(add VGPR_32),
(add (rotl VGPR_32, 1)),
(add (rotl VGPR_32, 2)),
(add (rotl VGPR_32, 3))]>;
[(add (trunc VGPR_32, 253)),
(add (shl VGPR_32, 1)),
(add (shl VGPR_32, 2)),
(add (shl VGPR_32, 3))]>;
// VGPR 256-bit registers
def VGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7],
[(add VGPR_32),
(add (rotl VGPR_32, 1)),
(add (rotl VGPR_32, 2)),
(add (rotl VGPR_32, 3)),
(add (rotl VGPR_32, 4)),
(add (rotl VGPR_32, 5)),
(add (rotl VGPR_32, 6)),
(add (rotl VGPR_32, 7))]>;
[(add (trunc VGPR_32, 249)),
(add (shl VGPR_32, 1)),
(add (shl VGPR_32, 2)),
(add (shl VGPR_32, 3)),
(add (shl VGPR_32, 4)),
(add (shl VGPR_32, 5)),
(add (shl VGPR_32, 6)),
(add (shl VGPR_32, 7))]>;
// VGPR 512-bit registers
def VGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
sub8, sub9, sub10, sub11, sub12, sub13, sub14, sub15],
[(add VGPR_32),
(add (rotl VGPR_32, 1)),
(add (rotl VGPR_32, 2)),
(add (rotl VGPR_32, 3)),
(add (rotl VGPR_32, 4)),
(add (rotl VGPR_32, 5)),
(add (rotl VGPR_32, 6)),
(add (rotl VGPR_32, 7)),
(add (rotl VGPR_32, 8)),
(add (rotl VGPR_32, 9)),
(add (rotl VGPR_32, 10)),
(add (rotl VGPR_32, 11)),
(add (rotl VGPR_32, 12)),
(add (rotl VGPR_32, 13)),
(add (rotl VGPR_32, 14)),
(add (rotl VGPR_32, 15))]>;
[(add (trunc VGPR_32, 241)),
(add (shl VGPR_32, 1)),
(add (shl VGPR_32, 2)),
(add (shl VGPR_32, 3)),
(add (shl VGPR_32, 4)),
(add (shl VGPR_32, 5)),
(add (shl VGPR_32, 6)),
(add (shl VGPR_32, 7)),
(add (shl VGPR_32, 8)),
(add (shl VGPR_32, 9)),
(add (shl VGPR_32, 10)),
(add (shl VGPR_32, 11)),
(add (shl VGPR_32, 12)),
(add (shl VGPR_32, 13)),
(add (shl VGPR_32, 14)),
(add (shl VGPR_32, 15))]>;
//===----------------------------------------------------------------------===//
// Register classes used as source and destination
//===----------------------------------------------------------------------===//
// Special register classes for predicates and the M0 register
def SCCReg : RegisterClass<"AMDGPU", [i32, i1], 32, (add SCC)>;
def VCCReg : RegisterClass<"AMDGPU", [i64, i1], 64, (add VCC)>;
def EXECReg : RegisterClass<"AMDGPU", [i64, i1], 64, (add EXEC)>;
def M0Reg : RegisterClass<"AMDGPU", [i32], 32, (add M0)>;
// Register class for all scalar registers (SGPRs + Special Registers)
def SReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
(add SGPR_32, M0, EXEC_LO, EXEC_HI)
(add SGPR_32, M0Reg)
>;
def SReg_64 : RegisterClass<"AMDGPU", [i1, i64], 64, (add SGPR_64, VCC, EXEC)>;
def SReg_64 : RegisterClass<"AMDGPU", [i64, i1], 64,
(add SGPR_64, VCCReg, EXECReg)
>;
def SReg_128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, (add SGPR_128)>;
def SReg_256 : RegisterClass<"AMDGPU", [v8i32], 256, (add SGPR_256)>;
def SReg_512 : RegisterClass<"AMDGPU", [v16i32], 512, (add SGPR_512)>;
// Register class for all vector registers (VGPRs + Interploation Registers)
def VReg_32 : RegisterClass<"AMDGPU", [f32, i32, v1i32], 32,
(add VGPR_32,
def VReg_32 : RegisterClass<"AMDGPU", [f32, i32, v1i32], 32, (add VGPR_32)>;
def VReg_64 : RegisterClass<"AMDGPU", [i64, v2i32], 64, (add VGPR_64)>;
def VReg_128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, (add VGPR_128)>;
def VReg_256 : RegisterClass<"AMDGPU", [v8i32], 256, (add VGPR_256)>;
def VReg_512 : RegisterClass<"AMDGPU", [v16i32], 512, (add VGPR_512)>;
//===----------------------------------------------------------------------===//
// [SV]Src_* register classes, can have either an immediate or an register
//===----------------------------------------------------------------------===//
def SSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add SReg_32)>;
def SSrc_64 : RegisterClass<"AMDGPU", [i64, i1], 64, (add SReg_64)>;
def VSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32,
(add VReg_32, SReg_32,
PERSP_SAMPLE_I, PERSP_SAMPLE_J,
PERSP_CENTER_I, PERSP_CENTER_J,
PERSP_CENTROID_I, PERSP_CENTROID_J,
@ -162,29 +220,8 @@ def VReg_32 : RegisterClass<"AMDGPU", [f32, i32, v1i32], 32,
ANCILLARY,
SAMPLE_COVERAGE,
POS_FIXED_PT
)
)
>;
def VReg_64 : RegisterClass<"AMDGPU", [i64, v2i32], 64, (add VGPR_64)>;
def VReg_128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, (add VGPR_128)>;
def VReg_256 : RegisterClass<"AMDGPU", [v8i32], 256, (add VGPR_256)>;
def VReg_512 : RegisterClass<"AMDGPU", [v16i32], 512, (add VGPR_512)>;
// [SV]Src_* operands can have either an immediate or an register
def SSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add SReg_32)>;
def SSrc_64 : RegisterClass<"AMDGPU", [i1, i64], 64, (add SReg_64)>;
def VSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add VReg_32, SReg_32)>;
def VSrc_64 : RegisterClass<"AMDGPU", [i64], 64, (add SReg_64, VReg_64)>;
// Special register classes for predicates and the M0 register
def SCCReg : RegisterClass<"AMDGPU", [i1], 1, (add SCC)>;
def VCCReg : RegisterClass<"AMDGPU", [i1], 1, (add VCC)>;
def EXECReg : RegisterClass<"AMDGPU", [i1], 1, (add EXEC)>;
def M0Reg : RegisterClass<"AMDGPU", [i32], 32, (add M0)>;
def VSrc_64 : RegisterClass<"AMDGPU", [i64], 64, (add VReg_64, SReg_64)>;