diff --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td index 6037dcc0e793..8bd7de7269b7 100644 --- a/llvm/lib/Target/AMDGPU/SMInstructions.td +++ b/llvm/lib/Target/AMDGPU/SMInstructions.td @@ -374,86 +374,6 @@ defm S_DCACHE_DISCARD : SM_Pseudo_Discards <"s_dcache_discard">; defm S_DCACHE_DISCARD_X2 : SM_Pseudo_Discards <"s_dcache_discard_x2">; } -//===----------------------------------------------------------------------===// -// Scalar Memory Patterns -//===----------------------------------------------------------------------===// - - -def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{ return isUniformLoad(N);}]>; - -def SMRDImm : ComplexPattern; -def SMRDImm32 : ComplexPattern; -def SMRDSgpr : ComplexPattern; -def SMRDBufferImm : ComplexPattern; -def SMRDBufferImm32 : ComplexPattern; - -multiclass SMRD_Pattern { - - // 1. IMM offset - def : GCNPat < - (smrd_load (SMRDImm i64:$sbase, i32:$offset)), - (vt (!cast(Instr#"_IMM") $sbase, $offset, 0)) - >; - - // 2. SGPR offset - def : GCNPat < - (smrd_load (SMRDSgpr i64:$sbase, i32:$offset)), - (vt (!cast(Instr#"_SGPR") $sbase, $offset, 0)) - >; -} - -multiclass SMLoad_Pattern { - // 1. Offset as an immediate - // name this pattern to reuse AddedComplexity on CI - def _IMM : GCNPat < - (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm i32:$offset), i1:$glc), - (vt (!cast(Instr#"_IMM") $sbase, $offset, (as_i1imm $glc))) - >; - - // 2. Offset loaded in an 32bit SGPR - def : GCNPat < - (SIsbuffer_load v4i32:$sbase, i32:$offset, i1:$glc), - (vt (!cast(Instr#"_SGPR") $sbase, $offset, (as_i1imm $glc))) - >; -} - - -let OtherPredicates = [isSICI] in { -def : GCNPat < - (i64 (readcyclecounter)), - (S_MEMTIME) ->; -} - -// Global and constant loads can be selected to either MUBUF or SMRD -// instructions, but SMRD instructions are faster so we want the instruction -// selector to prefer those. -let AddedComplexity = 100 in { - -defm : SMRD_Pattern <"S_LOAD_DWORD", i32>; -defm : SMRD_Pattern <"S_LOAD_DWORDX2", v2i32>; -defm : SMRD_Pattern <"S_LOAD_DWORDX4", v4i32>; -defm : SMRD_Pattern <"S_LOAD_DWORDX8", v8i32>; -defm : SMRD_Pattern <"S_LOAD_DWORDX16", v16i32>; - -// Name the pattern to reuse AddedComplexity on CI -defm SM_LOAD_PATTERN : SMLoad_Pattern <"S_BUFFER_LOAD_DWORD", i32>; -defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX2", v2i32>; -defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX4", v4i32>; -defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX8", v8i32>; -defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX16", v16i32>; -} // End let AddedComplexity = 100 - -let OtherPredicates = [isVI] in { - -def : GCNPat < - (i64 (readcyclecounter)), - (S_MEMREALTIME) ->; - -} // let OtherPredicates = [isVI] - - //===----------------------------------------------------------------------===// // Targets //===----------------------------------------------------------------------===// @@ -760,31 +680,91 @@ class SMRD_Real_ci op, SM_Pseudo ps> def S_DCACHE_INV_VOL_ci : SMRD_Real_ci <0x1d, S_DCACHE_INV_VOL>; -let AddedComplexity = SM_LOAD_PATTERN_IMM.AddedComplexity in { +//===----------------------------------------------------------------------===// +// Scalar Memory Patterns +//===----------------------------------------------------------------------===// -class SMRD_Pattern_ci : GCNPat < - (smrd_load (SMRDImm32 i64:$sbase, i32:$offset)), - (vt (!cast(Instr#"_IMM_ci") $sbase, $offset, 0))> { - let OtherPredicates = [isCIOnly]; +def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{ return isUniformLoad(N);}]>; + +def SMRDImm : ComplexPattern; +def SMRDImm32 : ComplexPattern; +def SMRDSgpr : ComplexPattern; +def SMRDBufferImm : ComplexPattern; +def SMRDBufferImm32 : ComplexPattern; + +multiclass SMRD_Pattern { + + // 1. IMM offset + def : GCNPat < + (smrd_load (SMRDImm i64:$sbase, i32:$offset)), + (vt (!cast(Instr#"_IMM") $sbase, $offset, 0)) + >; + + // 2. 32-bit IMM offset on CI + def : GCNPat < + (smrd_load (SMRDImm32 i64:$sbase, i32:$offset)), + (vt (!cast(Instr#"_IMM_ci") $sbase, $offset, 0))> { + let OtherPredicates = [isCIOnly]; + } + + // 3. SGPR offset + def : GCNPat < + (smrd_load (SMRDSgpr i64:$sbase, i32:$offset)), + (vt (!cast(Instr#"_SGPR") $sbase, $offset, 0)) + >; } -def : SMRD_Pattern_ci <"S_LOAD_DWORD", i32>; -def : SMRD_Pattern_ci <"S_LOAD_DWORDX2", v2i32>; -def : SMRD_Pattern_ci <"S_LOAD_DWORDX4", v4i32>; -def : SMRD_Pattern_ci <"S_LOAD_DWORDX8", v8i32>; -def : SMRD_Pattern_ci <"S_LOAD_DWORDX16", v16i32>; +multiclass SMLoad_Pattern { + // 1. Offset as an immediate + def : GCNPat < + (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm i32:$offset), i1:$glc), + (vt (!cast(Instr#"_IMM") $sbase, $offset, (as_i1imm $glc))) + >; -class SMLoad_Pattern_ci : GCNPat < - (vt (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm32 i32:$offset), i1:$glc)), - (!cast(Instr) $sbase, $offset, (as_i1imm $glc))> { - let OtherPredicates = [isCIOnly]; + // 2. 32-bit IMM offset on CI + def : GCNPat < + (vt (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm32 i32:$offset), i1:$glc)), + (!cast(Instr#"_IMM_ci") $sbase, $offset, (as_i1imm $glc))> { + let OtherPredicates = [isCIOnly]; + } + + // 3. Offset loaded in an 32bit SGPR + def : GCNPat < + (SIsbuffer_load v4i32:$sbase, i32:$offset, i1:$glc), + (vt (!cast(Instr#"_SGPR") $sbase, $offset, (as_i1imm $glc))) + >; } -def : SMLoad_Pattern_ci <"S_BUFFER_LOAD_DWORD_IMM_ci", i32>; -def : SMLoad_Pattern_ci <"S_BUFFER_LOAD_DWORDX2_IMM_ci", v2i32>; -def : SMLoad_Pattern_ci <"S_BUFFER_LOAD_DWORDX4_IMM_ci", v4i32>; -def : SMLoad_Pattern_ci <"S_BUFFER_LOAD_DWORDX8_IMM_ci", v8i32>; -def : SMLoad_Pattern_ci <"S_BUFFER_LOAD_DWORDX16_IMM_ci", v16i32>; +// Global and constant loads can be selected to either MUBUF or SMRD +// instructions, but SMRD instructions are faster so we want the instruction +// selector to prefer those. +let AddedComplexity = 100 in { -} // End let AddedComplexity = SM_LOAD_PATTERN.AddedComplexity +defm : SMRD_Pattern <"S_LOAD_DWORD", i32>; +defm : SMRD_Pattern <"S_LOAD_DWORDX2", v2i32>; +defm : SMRD_Pattern <"S_LOAD_DWORDX4", v4i32>; +defm : SMRD_Pattern <"S_LOAD_DWORDX8", v8i32>; +defm : SMRD_Pattern <"S_LOAD_DWORDX16", v16i32>; +defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORD", i32>; +defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX2", v2i32>; +defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX4", v4i32>; +defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX8", v8i32>; +defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX16", v16i32>; +} // End let AddedComplexity = 100 + +let OtherPredicates = [isSICI] in { +def : GCNPat < + (i64 (readcyclecounter)), + (S_MEMTIME) +>; +} + +let OtherPredicates = [isVI] in { + +def : GCNPat < + (i64 (readcyclecounter)), + (S_MEMREALTIME) +>; + +} // let OtherPredicates = [isVI]