[AMDGPU] Reserving VGPR for future SGPR Spill
Summary: One VGPR register is allocated to handle a future spill of SGPR if "--amdgpu-reserve-vgpr-for-sgpr-spill" option is used
Reviewers: arsenm, rampitec, msearles, cdevadas
Reviewed By: arsenm
Subscribers: madhur13490, qcolombet, kerbowa, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits
Tags: #amdgpu, #llvm
Differential Revision: https://reviews.llvm.org/D70379
2020-04-10 15:55:11 +08:00
|
|
|
; RUN: llc -mtriple amdgcn-amd-amdhsa -mcpu=gfx803 -O0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
|
|
|
|
|
|
|
|
define void @child_function() #0 {
|
|
|
|
call void asm sideeffect "", "~{vcc}" () #0
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2020-07-14 06:38:29 +08:00
|
|
|
; GCN-LABEL: {{^}}reserve_vgpr_with_no_lower_vgpr_available:
|
|
|
|
; GCN: buffer_store_dword v255, off, s[0:3], s32
|
|
|
|
; GCN: v_writelane_b32 v255, s33, 2
|
|
|
|
; GCN: v_writelane_b32 v255, s30, 0
|
|
|
|
; GCN: v_writelane_b32 v255, s31, 1
|
|
|
|
; GCN: s_swappc_b64 s[30:31], s[4:5]
|
|
|
|
; GCN: v_readlane_b32 s4, v255, 0
|
|
|
|
; GCN: v_readlane_b32 s5, v255, 1
|
|
|
|
; GCN: v_readlane_b32 s33, v255, 2
|
[AMDGPU] Reserving VGPR for future SGPR Spill
Summary: One VGPR register is allocated to handle a future spill of SGPR if "--amdgpu-reserve-vgpr-for-sgpr-spill" option is used
Reviewers: arsenm, rampitec, msearles, cdevadas
Reviewed By: arsenm
Subscribers: madhur13490, qcolombet, kerbowa, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits
Tags: #amdgpu, #llvm
Differential Revision: https://reviews.llvm.org/D70379
2020-04-10 15:55:11 +08:00
|
|
|
; GCN: ; NumVgprs: 256
|
|
|
|
|
2020-07-14 06:38:29 +08:00
|
|
|
define void @reserve_vgpr_with_no_lower_vgpr_available() #0 {
|
|
|
|
%alloca = alloca i32, align 4, addrspace(5)
|
|
|
|
store volatile i32 0, i32 addrspace(5)* %alloca
|
|
|
|
|
[AMDGPU] Reserving VGPR for future SGPR Spill
Summary: One VGPR register is allocated to handle a future spill of SGPR if "--amdgpu-reserve-vgpr-for-sgpr-spill" option is used
Reviewers: arsenm, rampitec, msearles, cdevadas
Reviewed By: arsenm
Subscribers: madhur13490, qcolombet, kerbowa, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits
Tags: #amdgpu, #llvm
Differential Revision: https://reviews.llvm.org/D70379
2020-04-10 15:55:11 +08:00
|
|
|
call void asm sideeffect "",
|
|
|
|
"~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9}
|
|
|
|
,~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19}
|
|
|
|
,~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29}
|
|
|
|
,~{v30},~{v31},~{v32},~{v33},~{v34},~{v35},~{v36},~{v37},~{v38},~{v39}
|
|
|
|
,~{v40},~{v41},~{v42},~{v43},~{v44},~{v45},~{v46},~{v47},~{v48},~{v49}
|
|
|
|
,~{v50},~{v51},~{v52},~{v53},~{v54},~{v55},~{v56},~{v57},~{v58},~{v59}
|
|
|
|
,~{v60},~{v61},~{v62},~{v63},~{v64},~{v65},~{v66},~{v67},~{v68},~{v69}
|
|
|
|
,~{v70},~{v71},~{v72},~{v73},~{v74},~{v75},~{v76},~{v77},~{v78},~{v79}
|
|
|
|
,~{v80},~{v81},~{v82},~{v83},~{v84},~{v85},~{v86},~{v87},~{v88},~{v89}
|
|
|
|
,~{v90},~{v91},~{v92},~{v93},~{v94},~{v95},~{v96},~{v97},~{v98},~{v99}
|
|
|
|
,~{v100},~{v101},~{v102},~{v103},~{v104},~{v105},~{v106},~{v107},~{v108},~{v109}
|
|
|
|
,~{v110},~{v111},~{v112},~{v113},~{v114},~{v115},~{v116},~{v117},~{v118},~{v119}
|
|
|
|
,~{v120},~{v121},~{v122},~{v123},~{v124},~{v125},~{v126},~{v127},~{v128},~{v129}
|
|
|
|
,~{v130},~{v131},~{v132},~{v133},~{v134},~{v135},~{v136},~{v137},~{v138},~{v139}
|
|
|
|
,~{v140},~{v141},~{v142},~{v143},~{v144},~{v145},~{v146},~{v147},~{v148},~{v149}
|
|
|
|
,~{v150},~{v151},~{v152},~{v153},~{v154},~{v155},~{v156},~{v157},~{v158},~{v159}
|
|
|
|
,~{v160},~{v161},~{v162},~{v163},~{v164},~{v165},~{v166},~{v167},~{v168},~{v169}
|
|
|
|
,~{v170},~{v171},~{v172},~{v173},~{v174},~{v175},~{v176},~{v177},~{v178},~{v179}
|
|
|
|
,~{v180},~{v181},~{v182},~{v183},~{v184},~{v185},~{v186},~{v187},~{v188},~{v189}
|
|
|
|
,~{v190},~{v191},~{v192},~{v193},~{v194},~{v195},~{v196},~{v197},~{v198},~{v199}
|
|
|
|
,~{v200},~{v201},~{v202},~{v203},~{v204},~{v205},~{v206},~{v207},~{v208},~{v209}
|
|
|
|
,~{v210},~{v211},~{v212},~{v213},~{v214},~{v215},~{v216},~{v217},~{v218},~{v219}
|
|
|
|
,~{v220},~{v221},~{v222},~{v223},~{v224},~{v225},~{v226},~{v227},~{v228},~{v229}
|
|
|
|
,~{v230},~{v231},~{v232},~{v233},~{v234},~{v235},~{v236},~{v237},~{v238},~{v239}
|
|
|
|
,~{v240},~{v241},~{v242},~{v243},~{v244},~{v245},~{v246},~{v247},~{v248},~{v249}
|
|
|
|
,~{v250},~{v251},~{v252},~{v253},~{v254}" () #0
|
|
|
|
call void @child_function()
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2020-07-14 06:38:29 +08:00
|
|
|
; GCN-LABEL: {{^}}reserve_lowest_available_vgpr:
|
|
|
|
; GCN: buffer_store_dword v254, off, s[0:3], s32
|
|
|
|
; GCN: v_writelane_b32 v254, s33, 2
|
|
|
|
; GCN: v_writelane_b32 v254, s30, 0
|
|
|
|
; GCN: v_writelane_b32 v254, s31, 1
|
|
|
|
; GCN: s_swappc_b64 s[30:31], s[4:5]
|
|
|
|
; GCN: v_readlane_b32 s4, v254, 0
|
|
|
|
; GCN: v_readlane_b32 s5, v254, 1
|
|
|
|
; GCN: v_readlane_b32 s33, v254, 2
|
|
|
|
|
|
|
|
define void @reserve_lowest_available_vgpr() #0 {
|
|
|
|
%alloca = alloca i32, align 4, addrspace(5)
|
|
|
|
store volatile i32 0, i32 addrspace(5)* %alloca
|
|
|
|
|
|
|
|
call void asm sideeffect "",
|
|
|
|
"~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9}
|
|
|
|
,~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19}
|
|
|
|
,~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29}
|
|
|
|
,~{v30},~{v31},~{v32},~{v33},~{v34},~{v35},~{v36},~{v37},~{v38},~{v39}
|
|
|
|
,~{v40},~{v41},~{v42},~{v43},~{v44},~{v45},~{v46},~{v47},~{v48},~{v49}
|
|
|
|
,~{v50},~{v51},~{v52},~{v53},~{v54},~{v55},~{v56},~{v57},~{v58},~{v59}
|
|
|
|
,~{v60},~{v61},~{v62},~{v63},~{v64},~{v65},~{v66},~{v67},~{v68},~{v69}
|
|
|
|
,~{v70},~{v71},~{v72},~{v73},~{v74},~{v75},~{v76},~{v77},~{v78},~{v79}
|
|
|
|
,~{v80},~{v81},~{v82},~{v83},~{v84},~{v85},~{v86},~{v87},~{v88},~{v89}
|
|
|
|
,~{v90},~{v91},~{v92},~{v93},~{v94},~{v95},~{v96},~{v97},~{v98},~{v99}
|
|
|
|
,~{v100},~{v101},~{v102},~{v103},~{v104},~{v105},~{v106},~{v107},~{v108},~{v109}
|
|
|
|
,~{v110},~{v111},~{v112},~{v113},~{v114},~{v115},~{v116},~{v117},~{v118},~{v119}
|
|
|
|
,~{v120},~{v121},~{v122},~{v123},~{v124},~{v125},~{v126},~{v127},~{v128},~{v129}
|
|
|
|
,~{v130},~{v131},~{v132},~{v133},~{v134},~{v135},~{v136},~{v137},~{v138},~{v139}
|
|
|
|
,~{v140},~{v141},~{v142},~{v143},~{v144},~{v145},~{v146},~{v147},~{v148},~{v149}
|
|
|
|
,~{v150},~{v151},~{v152},~{v153},~{v154},~{v155},~{v156},~{v157},~{v158},~{v159}
|
|
|
|
,~{v160},~{v161},~{v162},~{v163},~{v164},~{v165},~{v166},~{v167},~{v168},~{v169}
|
|
|
|
,~{v170},~{v171},~{v172},~{v173},~{v174},~{v175},~{v176},~{v177},~{v178},~{v179}
|
|
|
|
,~{v180},~{v181},~{v182},~{v183},~{v184},~{v185},~{v186},~{v187},~{v188},~{v189}
|
|
|
|
,~{v190},~{v191},~{v192},~{v193},~{v194},~{v195},~{v196},~{v197},~{v198},~{v199}
|
|
|
|
,~{v200},~{v201},~{v202},~{v203},~{v204},~{v205},~{v206},~{v207},~{v208},~{v209}
|
|
|
|
,~{v210},~{v211},~{v212},~{v213},~{v214},~{v215},~{v216},~{v217},~{v218},~{v219}
|
|
|
|
,~{v220},~{v221},~{v222},~{v223},~{v224},~{v225},~{v226},~{v227},~{v228},~{v229}
|
|
|
|
,~{v230},~{v231},~{v232},~{v233},~{v234},~{v235},~{v236},~{v237},~{v238},~{v239}
|
|
|
|
,~{v240},~{v241},~{v242},~{v243},~{v244},~{v245},~{v246},~{v247},~{v248},~{v249}
|
|
|
|
,~{v250},~{v251},~{v252},~{v253}" () #0
|
|
|
|
call void @child_function()
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}reserve_vgpr_with_sgpr_spills:
|
|
|
|
; GCN-NOT: buffer_store_dword v255, off, s[0:3], s32
|
|
|
|
; GCN: ; def s4
|
|
|
|
; GCN: v_writelane_b32 v254, s4, 2
|
|
|
|
; GCN: v_readlane_b32 s4, v254, 2
|
|
|
|
; GCN: ; use s4
|
|
|
|
|
|
|
|
define void @reserve_vgpr_with_sgpr_spills() #0 {
|
|
|
|
%alloca = alloca i32, align 4, addrspace(5)
|
|
|
|
store volatile i32 0, i32 addrspace(5)* %alloca
|
|
|
|
|
|
|
|
call void asm sideeffect "",
|
|
|
|
"~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9}
|
|
|
|
,~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19}
|
|
|
|
,~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29}
|
|
|
|
,~{v30},~{v31},~{v32},~{v33},~{v34},~{v35},~{v36},~{v37},~{v38},~{v39}
|
|
|
|
,~{v40},~{v41},~{v42},~{v43},~{v44},~{v45},~{v46},~{v47},~{v48},~{v49}
|
|
|
|
,~{v50},~{v51},~{v52},~{v53},~{v54},~{v55},~{v56},~{v57},~{v58},~{v59}
|
|
|
|
,~{v60},~{v61},~{v62},~{v63},~{v64},~{v65},~{v66},~{v67},~{v68},~{v69}
|
|
|
|
,~{v70},~{v71},~{v72},~{v73},~{v74},~{v75},~{v76},~{v77},~{v78},~{v79}
|
|
|
|
,~{v80},~{v81},~{v82},~{v83},~{v84},~{v85},~{v86},~{v87},~{v88},~{v89}
|
|
|
|
,~{v90},~{v91},~{v92},~{v93},~{v94},~{v95},~{v96},~{v97},~{v98},~{v99}
|
|
|
|
,~{v100},~{v101},~{v102},~{v103},~{v104},~{v105},~{v106},~{v107},~{v108},~{v109}
|
|
|
|
,~{v110},~{v111},~{v112},~{v113},~{v114},~{v115},~{v116},~{v117},~{v118},~{v119}
|
|
|
|
,~{v120},~{v121},~{v122},~{v123},~{v124},~{v125},~{v126},~{v127},~{v128},~{v129}
|
|
|
|
,~{v130},~{v131},~{v132},~{v133},~{v134},~{v135},~{v136},~{v137},~{v138},~{v139}
|
|
|
|
,~{v140},~{v141},~{v142},~{v143},~{v144},~{v145},~{v146},~{v147},~{v148},~{v149}
|
|
|
|
,~{v150},~{v151},~{v152},~{v153},~{v154},~{v155},~{v156},~{v157},~{v158},~{v159}
|
|
|
|
,~{v160},~{v161},~{v162},~{v163},~{v164},~{v165},~{v166},~{v167},~{v168},~{v169}
|
|
|
|
,~{v170},~{v171},~{v172},~{v173},~{v174},~{v175},~{v176},~{v177},~{v178},~{v179}
|
|
|
|
,~{v180},~{v181},~{v182},~{v183},~{v184},~{v185},~{v186},~{v187},~{v188},~{v189}
|
|
|
|
,~{v190},~{v191},~{v192},~{v193},~{v194},~{v195},~{v196},~{v197},~{v198},~{v199}
|
|
|
|
,~{v200},~{v201},~{v202},~{v203},~{v204},~{v205},~{v206},~{v207},~{v208},~{v209}
|
|
|
|
,~{v210},~{v211},~{v212},~{v213},~{v214},~{v215},~{v216},~{v217},~{v218},~{v219}
|
|
|
|
,~{v220},~{v221},~{v222},~{v223},~{v224},~{v225},~{v226},~{v227},~{v228},~{v229}
|
|
|
|
,~{v230},~{v231},~{v232},~{v233},~{v234},~{v235},~{v236},~{v237},~{v238},~{v239}
|
|
|
|
,~{v240},~{v241},~{v242},~{v243},~{v244},~{v245},~{v246},~{v247},~{v248},~{v249}
|
|
|
|
,~{v250},~{v251},~{v252},~{v253}" () #0
|
|
|
|
|
|
|
|
%sgpr = call i32 asm sideeffect "; def $0", "=s" () #0
|
|
|
|
%cmp = icmp eq i32 undef, 0
|
|
|
|
br i1 %cmp, label %bb0, label %ret
|
|
|
|
|
|
|
|
bb0:
|
|
|
|
call void asm sideeffect "; use $0", "s"(i32 %sgpr) #0
|
|
|
|
br label %ret
|
|
|
|
|
|
|
|
ret:
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}reserve_vgpr_with_tail_call
|
|
|
|
; GCN-NOT: buffer_store_dword v255, off, s[0:3], s32
|
|
|
|
; GCN-NOT: v_writelane
|
|
|
|
; GCN: s_setpc_b64 s[4:5]
|
|
|
|
|
|
|
|
define void @reserve_vgpr_with_tail_call() #0 {
|
|
|
|
%alloca = alloca i32, align 4, addrspace(5)
|
|
|
|
store volatile i32 0, i32 addrspace(5)* %alloca
|
|
|
|
|
|
|
|
call void asm sideeffect "",
|
|
|
|
"~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9}
|
|
|
|
,~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19}
|
|
|
|
,~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29}
|
|
|
|
,~{v30},~{v31},~{v32},~{v33},~{v34},~{v35},~{v36},~{v37},~{v38},~{v39}
|
|
|
|
,~{v40},~{v41},~{v42},~{v43},~{v44},~{v45},~{v46},~{v47},~{v48},~{v49}
|
|
|
|
,~{v50},~{v51},~{v52},~{v53},~{v54},~{v55},~{v56},~{v57},~{v58},~{v59}
|
|
|
|
,~{v60},~{v61},~{v62},~{v63},~{v64},~{v65},~{v66},~{v67},~{v68},~{v69}
|
|
|
|
,~{v70},~{v71},~{v72},~{v73},~{v74},~{v75},~{v76},~{v77},~{v78},~{v79}
|
|
|
|
,~{v80},~{v81},~{v82},~{v83},~{v84},~{v85},~{v86},~{v87},~{v88},~{v89}
|
|
|
|
,~{v90},~{v91},~{v92},~{v93},~{v94},~{v95},~{v96},~{v97},~{v98},~{v99}
|
|
|
|
,~{v100},~{v101},~{v102},~{v103},~{v104},~{v105},~{v106},~{v107},~{v108},~{v109}
|
|
|
|
,~{v110},~{v111},~{v112},~{v113},~{v114},~{v115},~{v116},~{v117},~{v118},~{v119}
|
|
|
|
,~{v120},~{v121},~{v122},~{v123},~{v124},~{v125},~{v126},~{v127},~{v128},~{v129}
|
|
|
|
,~{v130},~{v131},~{v132},~{v133},~{v134},~{v135},~{v136},~{v137},~{v138},~{v139}
|
|
|
|
,~{v140},~{v141},~{v142},~{v143},~{v144},~{v145},~{v146},~{v147},~{v148},~{v149}
|
|
|
|
,~{v150},~{v151},~{v152},~{v153},~{v154},~{v155},~{v156},~{v157},~{v158},~{v159}
|
|
|
|
,~{v160},~{v161},~{v162},~{v163},~{v164},~{v165},~{v166},~{v167},~{v168},~{v169}
|
|
|
|
,~{v170},~{v171},~{v172},~{v173},~{v174},~{v175},~{v176},~{v177},~{v178},~{v179}
|
|
|
|
,~{v180},~{v181},~{v182},~{v183},~{v184},~{v185},~{v186},~{v187},~{v188},~{v189}
|
|
|
|
,~{v190},~{v191},~{v192},~{v193},~{v194},~{v195},~{v196},~{v197},~{v198},~{v199}
|
|
|
|
,~{v200},~{v201},~{v202},~{v203},~{v204},~{v205},~{v206},~{v207},~{v208},~{v209}
|
|
|
|
,~{v210},~{v211},~{v212},~{v213},~{v214},~{v215},~{v216},~{v217},~{v218},~{v219}
|
|
|
|
,~{v220},~{v221},~{v222},~{v223},~{v224},~{v225},~{v226},~{v227},~{v228},~{v229}
|
|
|
|
,~{v230},~{v231},~{v232},~{v233},~{v234},~{v235},~{v236},~{v237},~{v238},~{v239}
|
|
|
|
,~{v240},~{v241},~{v242},~{v243},~{v244},~{v245},~{v246},~{v247},~{v248},~{v249}
|
|
|
|
,~{v250},~{v251},~{v252},~{v253},~{v254}" () #0
|
|
|
|
musttail call void @child_function()
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
[AMDGPU] Control num waves per EU for implicit work-group size
Summary:
If amdgpu-flat-work-group-size is not specified in LLVM IR, the backend
uses default value of 1024. For this, minimum waves per EU should be 4.
However, backend is still setting minimum value to 1 instead of calculated
value. This is not observed normally as frontend always provide
amdgpu-flat-work-group-size attribute.
Reviewers: rampitec, b-sumner, sameerds, msearles
Reviewed By: rampitec
Subscribers: qcolombet, arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, kerbowa, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D81991
2020-06-17 12:06:48 +08:00
|
|
|
attributes #0 = { nounwind noinline norecurse "amdgpu-flat-work-group-size"="1,256" }
|