forked from OSchip/llvm-project
[AMDGPU] gfx10 wave32 patterns
Differential Revision: https://reviews.llvm.org/D63511 llvm-svn: 363729
This commit is contained in:
parent
acc93d62e0
commit
bb1c8b6f5c
|
@ -167,7 +167,6 @@ def S_ADD_U64_CO_PSEUDO : SPseudoInstSI <
|
|||
def S_SUB_U64_CO_PSEUDO : SPseudoInstSI <
|
||||
(outs SReg_64:$vdst, VOPDstS64:$sdst), (ins SSrc_b64:$src0, SSrc_b64:$src1)
|
||||
>;
|
||||
|
||||
} // End usesCustomInserter = 1, Defs = [SCC]
|
||||
|
||||
let usesCustomInserter = 1 in {
|
||||
|
@ -563,7 +562,16 @@ def : GCNPat <
|
|||
def : GCNPat <
|
||||
(AMDGPUinit_exec i64:$src),
|
||||
(SI_INIT_EXEC (as_i64imm $src))
|
||||
>;
|
||||
> {
|
||||
let WaveSizePredicate = isWave64;
|
||||
}
|
||||
|
||||
def : GCNPat <
|
||||
(AMDGPUinit_exec i64:$src),
|
||||
(SI_INIT_EXEC_LO (as_i32imm $src))
|
||||
> {
|
||||
let WaveSizePredicate = isWave32;
|
||||
}
|
||||
|
||||
def : GCNPat <
|
||||
(AMDGPUinit_exec_from_input i32:$input, i32:$shift),
|
||||
|
@ -1170,7 +1178,16 @@ def : GCNPat <
|
|||
def : GCNPat <
|
||||
(i1 imm:$imm),
|
||||
(S_MOV_B64 (i64 (as_i64imm $imm)))
|
||||
>;
|
||||
> {
|
||||
let WaveSizePredicate = isWave64;
|
||||
}
|
||||
|
||||
def : GCNPat <
|
||||
(i1 imm:$imm),
|
||||
(S_MOV_B32 (i32 (as_i32imm $imm)))
|
||||
> {
|
||||
let WaveSizePredicate = isWave32;
|
||||
}
|
||||
|
||||
def : GCNPat <
|
||||
(f64 InlineFPImm<f64>:$imm),
|
||||
|
@ -1361,10 +1378,12 @@ def : FPToI1Pat<V_CMP_EQ_F64_e64, CONST.FP64_NEG_ONE, i64, f64, fp_to_sint>;
|
|||
|
||||
// If we need to perform a logical operation on i1 values, we need to
|
||||
// use vector comparisons since there is only one SCC register. Vector
|
||||
// comparisons still write to a pair of SGPRs, so treat these as
|
||||
// 64-bit comparisons. When legalizing SGPR copies, instructions
|
||||
// resulting in the copies from SCC to these instructions will be
|
||||
// moved to the VALU.
|
||||
// comparisons may write to a pair of SGPRs or a single SGPR, so treat
|
||||
// these as 32 or 64-bit comparisons. When legalizing SGPR copies,
|
||||
// instructions resulting in the copies from SCC to these instructions
|
||||
// will be moved to the VALU.
|
||||
|
||||
let WaveSizePredicate = isWave64 in {
|
||||
def : GCNPat <
|
||||
(i1 (and i1:$src0, i1:$src1)),
|
||||
(S_AND_B64 $src0, $src1)
|
||||
|
@ -1401,6 +1420,46 @@ def : GCNPat <
|
|||
(S_NOT_B64 $src0)
|
||||
>;
|
||||
}
|
||||
} // end isWave64
|
||||
|
||||
let WaveSizePredicate = isWave32 in {
|
||||
def : GCNPat <
|
||||
(i1 (and i1:$src0, i1:$src1)),
|
||||
(S_AND_B32 $src0, $src1)
|
||||
>;
|
||||
|
||||
def : GCNPat <
|
||||
(i1 (or i1:$src0, i1:$src1)),
|
||||
(S_OR_B32 $src0, $src1)
|
||||
>;
|
||||
|
||||
def : GCNPat <
|
||||
(i1 (xor i1:$src0, i1:$src1)),
|
||||
(S_XOR_B32 $src0, $src1)
|
||||
>;
|
||||
|
||||
def : GCNPat <
|
||||
(i1 (add i1:$src0, i1:$src1)),
|
||||
(S_XOR_B32 $src0, $src1)
|
||||
>;
|
||||
|
||||
def : GCNPat <
|
||||
(i1 (sub i1:$src0, i1:$src1)),
|
||||
(S_XOR_B32 $src0, $src1)
|
||||
>;
|
||||
|
||||
let AddedComplexity = 1 in {
|
||||
def : GCNPat <
|
||||
(i1 (add i1:$src0, (i1 -1))),
|
||||
(S_NOT_B32 $src0)
|
||||
>;
|
||||
|
||||
def : GCNPat <
|
||||
(i1 (sub i1:$src0, (i1 -1))),
|
||||
(S_NOT_B32 $src0)
|
||||
>;
|
||||
}
|
||||
} // end isWave32
|
||||
|
||||
def : GCNPat <
|
||||
(f16 (sint_to_fp i1:$src)),
|
||||
|
|
|
@ -454,7 +454,7 @@ def LDS_DIRECT_CLASS : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16
|
|||
|
||||
// Subset of SReg_32 without M0 for SMRD instructions and alike.
|
||||
// See comments in SIInstructions.td for more info.
|
||||
def SReg_32_XM0_XEXEC : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
|
||||
def SReg_32_XM0_XEXEC : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32,
|
||||
(add SGPR_32, VCC_LO, VCC_HI, FLAT_SCR_LO, FLAT_SCR_HI, XNACK_MASK_LO, XNACK_MASK_HI,
|
||||
SGPR_NULL, TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI, SRC_SHARED_BASE, SRC_SHARED_LIMIT,
|
||||
SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT, SRC_POPS_EXITING_WAVE_ID,
|
||||
|
@ -462,23 +462,23 @@ def SReg_32_XM0_XEXEC : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f1
|
|||
let AllocationPriority = 8;
|
||||
}
|
||||
|
||||
def SReg_32_XEXEC_HI : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
|
||||
def SReg_32_XEXEC_HI : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32,
|
||||
(add SReg_32_XM0_XEXEC, EXEC_LO, M0_CLASS)> {
|
||||
let AllocationPriority = 8;
|
||||
}
|
||||
|
||||
def SReg_32_XM0 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
|
||||
def SReg_32_XM0 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32,
|
||||
(add SReg_32_XM0_XEXEC, EXEC_LO, EXEC_HI)> {
|
||||
let AllocationPriority = 8;
|
||||
}
|
||||
|
||||
// Register class for all scalar registers (SGPRs + Special Registers)
|
||||
def SReg_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
|
||||
def SReg_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32,
|
||||
(add SReg_32_XM0, M0_CLASS, EXEC_LO, EXEC_HI, SReg_32_XEXEC_HI)> {
|
||||
let AllocationPriority = 8;
|
||||
}
|
||||
|
||||
def SRegOrLds_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
|
||||
def SRegOrLds_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32,
|
||||
(add SReg_32_XM0, M0_CLASS, EXEC_LO, EXEC_HI, SReg_32_XEXEC_HI, LDS_DIRECT_CLASS)> {
|
||||
let isAllocatable = 0;
|
||||
}
|
||||
|
|
|
@ -152,12 +152,24 @@ let Defs = [SCC] in {
|
|||
[(set i64:$sdst, (not i64:$src0))]
|
||||
>;
|
||||
def S_WQM_B32 : SOP1_32 <"s_wqm_b32">;
|
||||
def S_WQM_B64 : SOP1_64 <"s_wqm_b64",
|
||||
[(set i1:$sdst, (int_amdgcn_wqm_vote i1:$src0))]
|
||||
>;
|
||||
def S_WQM_B64 : SOP1_64 <"s_wqm_b64">;
|
||||
} // End Defs = [SCC]
|
||||
|
||||
|
||||
let WaveSizePredicate = isWave32 in {
|
||||
def : GCNPat <
|
||||
(int_amdgcn_wqm_vote i1:$src0),
|
||||
(S_WQM_B32 $src0)
|
||||
>;
|
||||
}
|
||||
|
||||
let WaveSizePredicate = isWave64 in {
|
||||
def : GCNPat <
|
||||
(int_amdgcn_wqm_vote i1:$src0),
|
||||
(S_WQM_B64 $src0)
|
||||
>;
|
||||
}
|
||||
|
||||
def S_BREV_B32 : SOP1_32 <"s_brev_b32",
|
||||
[(set i32:$sdst, (bitreverse i32:$src0))]
|
||||
>;
|
||||
|
|
Loading…
Reference in New Issue