From e8e6817d00a4dd1ba563a96aec2fd2be8f35dea9 Mon Sep 17 00:00:00 2001 From: Ruiling Song Date: Tue, 9 Mar 2021 22:05:21 +0800 Subject: [PATCH] [AMDGPU] Don't check hasStackObjects() when reserving VGPR We have amdgpu_gfx functions that have high register pressure. If we do not reserve VGPR for SGPR spill, we will fall into the path to spill the SGPR to memory, which does not only have correctness issue, but also have really bad performance. I don't know why there is the check for hasStackObjects(), in our case, we don't have stack objects at the time of finalizeLowering(). So just remove the check that we always reserve a VGPR for possible SGPR spill in non-entry functions. Reviewed by: arsenm Differential Revision: https://reviews.llvm.org/D98345 --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 3 +- .../AMDGPU/reserve-vgpr-for-sgpr-spill.ll | 47 +++++++++++++++++++ 2 files changed, 48 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 516d272deab3..3f1549ab7931 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -11649,8 +11649,7 @@ void SITargetLowering::finalizeLowering(MachineFunction &MF) const { // "amdgpu-reserve-vgpr-for-sgpr-spill" option is used // FIXME: We won't need this hack if we split SGPR allocation from VGPR if (VGPRReserveforSGPRSpill && TRI->spillSGPRToVGPR() && - !Info->VGPRReservedForSGPRSpill && !Info->isEntryFunction() && - MF.getFrameInfo().hasStackObjects()) + !Info->VGPRReservedForSGPRSpill && !Info->isEntryFunction()) Info->reserveVGPRforSGPRSpills(MF); } diff --git a/llvm/test/CodeGen/AMDGPU/reserve-vgpr-for-sgpr-spill.ll b/llvm/test/CodeGen/AMDGPU/reserve-vgpr-for-sgpr-spill.ll index 7ba977071936..c0433d25b501 100644 --- a/llvm/test/CodeGen/AMDGPU/reserve-vgpr-for-sgpr-spill.ll +++ b/llvm/test/CodeGen/AMDGPU/reserve-vgpr-for-sgpr-spill.ll @@ -187,4 +187,51 @@ define void @reserve_vgpr_with_tail_call() #0 { ret void } +; GCN-LABEL: {{^}}reserve_vgpr_for_sgpr_spills_no_alloca: +; GCN: v_writelane_b32 v5, s34, 0 +; GCN: v_writelane_b32 v5, s35, 1 +; GCN: v_writelane_b32 v5, s36, 2 +; GCN: v_writelane_b32 v5, s37, 3 +; GCN: v_readlane_b32 s37, v5, 3 +; GCN: v_readlane_b32 s36, v5, 2 +; GCN: v_readlane_b32 s35, v5, 1 +; GCN: v_readlane_b32 s34, v5, 0 + +define void @reserve_vgpr_for_sgpr_spills_no_alloca(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 { + %a = load <4 x i32>, <4 x i32> addrspace(1)* %in + call void asm sideeffect "", + "~{v6},~{v7},~{v8},~{v9} + ,~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19} + ,~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29} + ,~{v30},~{v31},~{v32},~{v33},~{v34},~{v35},~{v36},~{v37},~{v38},~{v39} + ,~{v40},~{v41},~{v42},~{v43},~{v44},~{v45},~{v46},~{v47},~{v48},~{v49} + ,~{v50},~{v51},~{v52},~{v53},~{v54},~{v55},~{v56},~{v57},~{v58},~{v59} + ,~{v60},~{v61},~{v62},~{v63},~{v64},~{v65},~{v66},~{v67},~{v68},~{v69} + ,~{v70},~{v71},~{v72},~{v73},~{v74},~{v75},~{v76},~{v77},~{v78},~{v79} + ,~{v80},~{v81},~{v82},~{v83},~{v84},~{v85},~{v86},~{v87},~{v88},~{v89} + ,~{v90},~{v91},~{v92},~{v93},~{v94},~{v95},~{v96},~{v97},~{v98},~{v99} + ,~{v100},~{v101},~{v102},~{v103},~{v104},~{v105},~{v106},~{v107},~{v108},~{v109} + ,~{v110},~{v111},~{v112},~{v113},~{v114},~{v115},~{v116},~{v117},~{v118},~{v119} + ,~{v120},~{v121},~{v122},~{v123},~{v124},~{v125},~{v126},~{v127},~{v128},~{v129} + ,~{v130},~{v131},~{v132},~{v133},~{v134},~{v135},~{v136},~{v137},~{v138},~{v139} + ,~{v140},~{v141},~{v142},~{v143},~{v144},~{v145},~{v146},~{v147},~{v148},~{v149} + ,~{v150},~{v151},~{v152},~{v153},~{v154},~{v155},~{v156},~{v157},~{v158},~{v159} + ,~{v160},~{v161},~{v162},~{v163},~{v164},~{v165},~{v166},~{v167},~{v168},~{v169} + ,~{v170},~{v171},~{v172},~{v173},~{v174},~{v175},~{v176},~{v177},~{v178},~{v179} + ,~{v180},~{v181},~{v182},~{v183},~{v184},~{v185},~{v186},~{v187},~{v188},~{v189} + ,~{v190},~{v191},~{v192},~{v193},~{v194},~{v195},~{v196},~{v197},~{v198},~{v199} + ,~{v200},~{v201},~{v202},~{v203},~{v204},~{v205},~{v206},~{v207},~{v208},~{v209} + ,~{v210},~{v211},~{v212},~{v213},~{v214},~{v215},~{v216},~{v217},~{v218},~{v219} + ,~{v220},~{v221},~{v222},~{v223},~{v224},~{v225},~{v226},~{v227},~{v228},~{v229} + ,~{v230},~{v231},~{v232},~{v233},~{v234},~{v235},~{v236},~{v237},~{v238},~{v239} + ,~{v240},~{v241},~{v242},~{v243},~{v244},~{v245},~{v246},~{v247},~{v248},~{v249} + ,~{v250},~{v251},~{v252},~{v253},~{v254},~{v255}" () #0 + + call void asm sideeffect "", + "~{s34},~{s35},~{s36},~{s37}" () #0 + + store <4 x i32> %a, <4 x i32> addrspace(1)* %out + ret void +} + attributes #0 = { nounwind noinline norecurse "amdgpu-flat-work-group-size"="1,256" }