forked from OSchip/llvm-project
[AMDGPU] Don't check hasStackObjects() when reserving VGPR
We have amdgpu_gfx functions that have high register pressure. If we do not reserve VGPR for SGPR spill, we will fall into the path to spill the SGPR to memory, which does not only have correctness issue, but also have really bad performance. I don't know why there is the check for hasStackObjects(), in our case, we don't have stack objects at the time of finalizeLowering(). So just remove the check that we always reserve a VGPR for possible SGPR spill in non-entry functions. Reviewed by: arsenm Differential Revision: https://reviews.llvm.org/D98345
This commit is contained in:
parent
4cee5cad28
commit
e8e6817d00
|
@ -11649,8 +11649,7 @@ void SITargetLowering::finalizeLowering(MachineFunction &MF) const {
|
||||||
// "amdgpu-reserve-vgpr-for-sgpr-spill" option is used
|
// "amdgpu-reserve-vgpr-for-sgpr-spill" option is used
|
||||||
// FIXME: We won't need this hack if we split SGPR allocation from VGPR
|
// FIXME: We won't need this hack if we split SGPR allocation from VGPR
|
||||||
if (VGPRReserveforSGPRSpill && TRI->spillSGPRToVGPR() &&
|
if (VGPRReserveforSGPRSpill && TRI->spillSGPRToVGPR() &&
|
||||||
!Info->VGPRReservedForSGPRSpill && !Info->isEntryFunction() &&
|
!Info->VGPRReservedForSGPRSpill && !Info->isEntryFunction())
|
||||||
MF.getFrameInfo().hasStackObjects())
|
|
||||||
Info->reserveVGPRforSGPRSpills(MF);
|
Info->reserveVGPRforSGPRSpills(MF);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -187,4 +187,51 @@ define void @reserve_vgpr_with_tail_call() #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; GCN-LABEL: {{^}}reserve_vgpr_for_sgpr_spills_no_alloca:
|
||||||
|
; GCN: v_writelane_b32 v5, s34, 0
|
||||||
|
; GCN: v_writelane_b32 v5, s35, 1
|
||||||
|
; GCN: v_writelane_b32 v5, s36, 2
|
||||||
|
; GCN: v_writelane_b32 v5, s37, 3
|
||||||
|
; GCN: v_readlane_b32 s37, v5, 3
|
||||||
|
; GCN: v_readlane_b32 s36, v5, 2
|
||||||
|
; GCN: v_readlane_b32 s35, v5, 1
|
||||||
|
; GCN: v_readlane_b32 s34, v5, 0
|
||||||
|
|
||||||
|
define void @reserve_vgpr_for_sgpr_spills_no_alloca(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
|
||||||
|
%a = load <4 x i32>, <4 x i32> addrspace(1)* %in
|
||||||
|
call void asm sideeffect "",
|
||||||
|
"~{v6},~{v7},~{v8},~{v9}
|
||||||
|
,~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19}
|
||||||
|
,~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29}
|
||||||
|
,~{v30},~{v31},~{v32},~{v33},~{v34},~{v35},~{v36},~{v37},~{v38},~{v39}
|
||||||
|
,~{v40},~{v41},~{v42},~{v43},~{v44},~{v45},~{v46},~{v47},~{v48},~{v49}
|
||||||
|
,~{v50},~{v51},~{v52},~{v53},~{v54},~{v55},~{v56},~{v57},~{v58},~{v59}
|
||||||
|
,~{v60},~{v61},~{v62},~{v63},~{v64},~{v65},~{v66},~{v67},~{v68},~{v69}
|
||||||
|
,~{v70},~{v71},~{v72},~{v73},~{v74},~{v75},~{v76},~{v77},~{v78},~{v79}
|
||||||
|
,~{v80},~{v81},~{v82},~{v83},~{v84},~{v85},~{v86},~{v87},~{v88},~{v89}
|
||||||
|
,~{v90},~{v91},~{v92},~{v93},~{v94},~{v95},~{v96},~{v97},~{v98},~{v99}
|
||||||
|
,~{v100},~{v101},~{v102},~{v103},~{v104},~{v105},~{v106},~{v107},~{v108},~{v109}
|
||||||
|
,~{v110},~{v111},~{v112},~{v113},~{v114},~{v115},~{v116},~{v117},~{v118},~{v119}
|
||||||
|
,~{v120},~{v121},~{v122},~{v123},~{v124},~{v125},~{v126},~{v127},~{v128},~{v129}
|
||||||
|
,~{v130},~{v131},~{v132},~{v133},~{v134},~{v135},~{v136},~{v137},~{v138},~{v139}
|
||||||
|
,~{v140},~{v141},~{v142},~{v143},~{v144},~{v145},~{v146},~{v147},~{v148},~{v149}
|
||||||
|
,~{v150},~{v151},~{v152},~{v153},~{v154},~{v155},~{v156},~{v157},~{v158},~{v159}
|
||||||
|
,~{v160},~{v161},~{v162},~{v163},~{v164},~{v165},~{v166},~{v167},~{v168},~{v169}
|
||||||
|
,~{v170},~{v171},~{v172},~{v173},~{v174},~{v175},~{v176},~{v177},~{v178},~{v179}
|
||||||
|
,~{v180},~{v181},~{v182},~{v183},~{v184},~{v185},~{v186},~{v187},~{v188},~{v189}
|
||||||
|
,~{v190},~{v191},~{v192},~{v193},~{v194},~{v195},~{v196},~{v197},~{v198},~{v199}
|
||||||
|
,~{v200},~{v201},~{v202},~{v203},~{v204},~{v205},~{v206},~{v207},~{v208},~{v209}
|
||||||
|
,~{v210},~{v211},~{v212},~{v213},~{v214},~{v215},~{v216},~{v217},~{v218},~{v219}
|
||||||
|
,~{v220},~{v221},~{v222},~{v223},~{v224},~{v225},~{v226},~{v227},~{v228},~{v229}
|
||||||
|
,~{v230},~{v231},~{v232},~{v233},~{v234},~{v235},~{v236},~{v237},~{v238},~{v239}
|
||||||
|
,~{v240},~{v241},~{v242},~{v243},~{v244},~{v245},~{v246},~{v247},~{v248},~{v249}
|
||||||
|
,~{v250},~{v251},~{v252},~{v253},~{v254},~{v255}" () #0
|
||||||
|
|
||||||
|
call void asm sideeffect "",
|
||||||
|
"~{s34},~{s35},~{s36},~{s37}" () #0
|
||||||
|
|
||||||
|
store <4 x i32> %a, <4 x i32> addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
attributes #0 = { nounwind noinline norecurse "amdgpu-flat-work-group-size"="1,256" }
|
attributes #0 = { nounwind noinline norecurse "amdgpu-flat-work-group-size"="1,256" }
|
||||||
|
|
Loading…
Reference in New Issue