forked from OSchip/llvm-project
AMDGPU: Enable integer division bypass
We probably want this, and I've meant to turn this on for a long time. SC actually emits a special case to early-out for a 1 denominator, which perhaps should also be considered.
This commit is contained in:
parent
ed07c89fc5
commit
4bb0c8f91c
|
@ -37,6 +37,11 @@ using namespace llvm;
|
|||
|
||||
#include "AMDGPUGenCallingConv.inc"
|
||||
|
||||
static cl::opt<bool> AMDGPUBypassSlowDiv(
|
||||
"amdgpu-bypass-slow-div",
|
||||
cl::desc("Skip 64-bit divide for dynamic 32-bit values"),
|
||||
cl::init(true));
|
||||
|
||||
// Find a larger type to do a load / store of a vector with.
|
||||
EVT AMDGPUTargetLowering::getEquivalentMemType(LLVMContext &Ctx, EVT VT) {
|
||||
unsigned StoreSize = VT.getStoreSizeInBits();
|
||||
|
@ -482,6 +487,10 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
|
|||
MaxStoresPerMemmove = 0xffffffff;
|
||||
MaxStoresPerMemset = 0xffffffff;
|
||||
|
||||
// The expansion for 64-bit division is enormous.
|
||||
if (AMDGPUBypassSlowDiv)
|
||||
addBypassSlowDiv(64, 32);
|
||||
|
||||
setTargetDAGCombine(ISD::BITCAST);
|
||||
setTargetDAGCombine(ISD::SHL);
|
||||
setTargetDAGCombine(ISD::SRA);
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: opt -S -mtriple=amdgcn-- -mcpu=tahiti -amdgpu-codegenprepare %s | FileCheck %s
|
||||
; RUN: llc -mtriple=amdgcn-- -mcpu=tahiti < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: opt -S -mtriple=amdgcn-- -mcpu=tahiti -amdgpu-codegenprepare -amdgpu-bypass-slow-div=0 %s | FileCheck %s
|
||||
; RUN: llc -mtriple=amdgcn-- -mcpu=tahiti -amdgpu-bypass-slow-div=0 < %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
define amdgpu_kernel void @udiv_i32(i32 addrspace(1)* %out, i32 %x, i32 %y) {
|
||||
; CHECK-LABEL: @udiv_i32(
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,6 +1,6 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx600 -amdgpu-codegenprepare-expand-div64 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN-IR %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx600 -amdgpu-bypass-slow-div=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx600 -amdgpu-bypass-slow-div=0 -amdgpu-codegenprepare-expand-div64 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN-IR %s
|
||||
|
||||
define amdgpu_kernel void @s_test_sdiv(i64 addrspace(1)* %out, i64 %x, i64 %y) {
|
||||
; GCN-LABEL: s_test_sdiv:
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
;RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG %s
|
||||
;RUN: llc -march=r600 -mcpu=redwood -amdgpu-bypass-slow-div=0 < %s | FileCheck -check-prefix=EG %s
|
||||
|
||||
;EG-LABEL: {{^}}s_test_sdiv:
|
||||
;EG: RECIP_UINT
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx600 -amdgpu-codegenprepare-expand-div64 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN-IR %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx600 -amdgpu-bypass-slow-div=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx600 -amdgpu-bypass-slow-div=0 -amdgpu-codegenprepare-expand-div64 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN-IR %s
|
||||
|
||||
define amdgpu_kernel void @s_test_srem(i64 addrspace(1)* %out, i64 %x, i64 %y) {
|
||||
; GCN-LABEL: s_test_srem:
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx600 -amdgpu-codegenprepare-expand-div64 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN-IR %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx600 -amdgpu-bypass-slow-div=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx600 -amdgpu-bypass-slow-div=0 -amdgpu-codegenprepare-expand-div64 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN-IR %s
|
||||
|
||||
define amdgpu_kernel void @s_test_udiv_i64(i64 addrspace(1)* %out, i64 %x, i64 %y) {
|
||||
; GCN-LABEL: s_test_udiv_i64:
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx600 -amdgpu-codegenprepare-expand-div64 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN-IR %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx600 -amdgpu-bypass-slow-div=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx600 -amdgpu-bypass-slow-div=0 -amdgpu-codegenprepare-expand-div64 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN-IR %s
|
||||
|
||||
define amdgpu_kernel void @s_test_urem_i64(i64 addrspace(1)* %out, i64 %x, i64 %y) {
|
||||
; GCN-LABEL: s_test_urem_i64:
|
||||
|
|
Loading…
Reference in New Issue