forked from OSchip/llvm-project
AMDGPU: Add flag to disable promotion of uniform i16 ops
This interferes with GlobalISel's much better handling of the situation. This should really be disable for GlobalISel. However, the fallback only re-runs the selection passes, and doesn't go back and rerun any codegen IR passes. I haven't come up with a good solution to this problem.
This commit is contained in:
parent
2b7a2cbb15
commit
75e6f0b3d4
|
@ -60,6 +60,12 @@ static cl::opt<bool> WidenLoads(
|
|||
cl::ReallyHidden,
|
||||
cl::init(false));
|
||||
|
||||
static cl::opt<bool> Widen16BitOps(
|
||||
"amdgpu-codegenprepare-widen-16-bit-ops",
|
||||
cl::desc("Widen uniform 16-bit instructions to 32-bit in AMDGPUCodeGenPrepare"),
|
||||
cl::ReallyHidden,
|
||||
cl::init(true));
|
||||
|
||||
static cl::opt<bool> UseMul24Intrin(
|
||||
"amdgpu-codegenprepare-mul24",
|
||||
cl::desc("Introduce mul24 intrinsics in AMDGPUCodeGenPrepare"),
|
||||
|
@ -269,6 +275,9 @@ bool AMDGPUCodeGenPrepare::isSigned(const SelectInst &I) const {
|
|||
}
|
||||
|
||||
bool AMDGPUCodeGenPrepare::needsPromotionToI32(const Type *T) const {
|
||||
if (!Widen16BitOps)
|
||||
return false;
|
||||
|
||||
const IntegerType *IntTy = dyn_cast<IntegerType>(T);
|
||||
if (IntTy && IntTy->getBitWidth() > 1 && IntTy->getBitWidth() <= 16)
|
||||
return true;
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6 %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
|
||||
; RUN: llc -global-isel -amdgpu-codegenprepare-widen-16-bit-ops=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6 %s
|
||||
; RUN: llc -global-isel -amdgpu-codegenprepare-widen-16-bit-ops=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
|
||||
|
||||
define amdgpu_ps i32 @s_andn2_i32(i32 inreg %src0, i32 inreg %src1) {
|
||||
; GCN-LABEL: s_andn2_i32:
|
||||
|
@ -196,58 +196,31 @@ define amdgpu_ps <2 x i32> @s_andn2_v2i32_commute(<2 x i32> inreg %src0, <2 x i3
|
|||
}
|
||||
|
||||
define amdgpu_ps i16 @s_andn2_i16(i16 inreg %src0, i16 inreg %src1) {
|
||||
; GFX6-LABEL: s_andn2_i16:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_andn2_b32 s0, s2, s3
|
||||
; GFX6-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX9-LABEL: s_andn2_i16:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_mov_b32 s0, 0xffff
|
||||
; GFX9-NEXT: s_and_b32 s1, s3, s0
|
||||
; GFX9-NEXT: s_xor_b32 s0, s1, s0
|
||||
; GFX9-NEXT: s_and_b32 s0, s2, s0
|
||||
; GFX9-NEXT: s_bfe_u32 s0, s0, 0x100000
|
||||
; GFX9-NEXT: ; return to shader part epilog
|
||||
; GCN-LABEL: s_andn2_i16:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_andn2_b32 s0, s2, s3
|
||||
; GCN-NEXT: ; return to shader part epilog
|
||||
%not.src1 = xor i16 %src1, -1
|
||||
%and = and i16 %src0, %not.src1
|
||||
ret i16 %and
|
||||
}
|
||||
|
||||
define amdgpu_ps i16 @s_andn2_i16_commute(i16 inreg %src0, i16 inreg %src1) {
|
||||
; GFX6-LABEL: s_andn2_i16_commute:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_andn2_b32 s0, s2, s3
|
||||
; GFX6-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX9-LABEL: s_andn2_i16_commute:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_mov_b32 s0, 0xffff
|
||||
; GFX9-NEXT: s_and_b32 s1, s3, s0
|
||||
; GFX9-NEXT: s_xor_b32 s0, s1, s0
|
||||
; GFX9-NEXT: s_and_b32 s0, s0, s2
|
||||
; GFX9-NEXT: s_bfe_u32 s0, s0, 0x100000
|
||||
; GFX9-NEXT: ; return to shader part epilog
|
||||
; GCN-LABEL: s_andn2_i16_commute:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_andn2_b32 s0, s2, s3
|
||||
; GCN-NEXT: ; return to shader part epilog
|
||||
%not.src1 = xor i16 %src1, -1
|
||||
%and = and i16 %not.src1, %src0
|
||||
ret i16 %and
|
||||
}
|
||||
|
||||
define amdgpu_ps { i16, i16 } @s_andn2_i16_multi_use(i16 inreg %src0, i16 inreg %src1) {
|
||||
; GFX6-LABEL: s_andn2_i16_multi_use:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_xor_b32 s1, s3, -1
|
||||
; GFX6-NEXT: s_andn2_b32 s0, s2, s3
|
||||
; GFX6-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX9-LABEL: s_andn2_i16_multi_use:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_mov_b32 s0, 0xffff
|
||||
; GFX9-NEXT: s_and_b32 s1, s3, s0
|
||||
; GFX9-NEXT: s_xor_b32 s1, s1, s0
|
||||
; GFX9-NEXT: s_and_b32 s0, s2, s1
|
||||
; GFX9-NEXT: s_bfe_u32 s0, s0, 0x100000
|
||||
; GFX9-NEXT: ; return to shader part epilog
|
||||
; GCN-LABEL: s_andn2_i16_multi_use:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_xor_b32 s1, s3, -1
|
||||
; GCN-NEXT: s_andn2_b32 s0, s2, s3
|
||||
; GCN-NEXT: ; return to shader part epilog
|
||||
%not.src1 = xor i16 %src1, -1
|
||||
%and = and i16 %src0, %not.src1
|
||||
%insert.0 = insertvalue { i16, i16 } undef, i16 %and, 0
|
||||
|
@ -256,23 +229,11 @@ define amdgpu_ps { i16, i16 } @s_andn2_i16_multi_use(i16 inreg %src0, i16 inreg
|
|||
}
|
||||
|
||||
define amdgpu_ps { i16, i16 } @s_andn2_i16_multi_foldable_use(i16 inreg %src0, i16 inreg %src1, i16 inreg %src2) {
|
||||
; GFX6-LABEL: s_andn2_i16_multi_foldable_use:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_andn2_b32 s0, s2, s4
|
||||
; GFX6-NEXT: s_andn2_b32 s1, s3, s4
|
||||
; GFX6-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX9-LABEL: s_andn2_i16_multi_foldable_use:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_mov_b32 s1, 0xffff
|
||||
; GFX9-NEXT: s_and_b32 s0, s4, s1
|
||||
; GFX9-NEXT: s_xor_b32 s0, s0, s1
|
||||
; GFX9-NEXT: s_and_b32 s2, s2, s1
|
||||
; GFX9-NEXT: s_and_b32 s4, s0, s1
|
||||
; GFX9-NEXT: s_and_b32 s1, s3, s1
|
||||
; GFX9-NEXT: s_and_b32 s0, s2, s4
|
||||
; GFX9-NEXT: s_and_b32 s1, s1, s4
|
||||
; GFX9-NEXT: ; return to shader part epilog
|
||||
; GCN-LABEL: s_andn2_i16_multi_foldable_use:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_andn2_b32 s0, s2, s4
|
||||
; GCN-NEXT: s_andn2_b32 s1, s3, s4
|
||||
; GCN-NEXT: ; return to shader part epilog
|
||||
%not.src2 = xor i16 %src2, -1
|
||||
%and0 = and i16 %src0, %not.src2
|
||||
%and1 = and i16 %src1, %not.src2
|
||||
|
@ -308,21 +269,12 @@ define amdgpu_ps float @v_andn2_i16_sv(i16 inreg %src0, i16 %src1) {
|
|||
}
|
||||
|
||||
define amdgpu_ps float @v_andn2_i16_vs(i16 %src0, i16 inreg %src1) {
|
||||
; GFX6-LABEL: v_andn2_i16_vs:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_xor_b32 s0, s2, -1
|
||||
; GFX6-NEXT: v_and_b32_e32 v0, s0, v0
|
||||
; GFX6-NEXT: v_bfe_u32 v0, v0, 0, 16
|
||||
; GFX6-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX9-LABEL: v_andn2_i16_vs:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_mov_b32 s0, 0xffff
|
||||
; GFX9-NEXT: s_and_b32 s1, s2, s0
|
||||
; GFX9-NEXT: s_xor_b32 s0, s1, s0
|
||||
; GFX9-NEXT: v_and_b32_e32 v0, s0, v0
|
||||
; GFX9-NEXT: v_bfe_u32 v0, v0, 0, 16
|
||||
; GFX9-NEXT: ; return to shader part epilog
|
||||
; GCN-LABEL: v_andn2_i16_vs:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_xor_b32 s0, s2, -1
|
||||
; GCN-NEXT: v_and_b32_e32 v0, s0, v0
|
||||
; GCN-NEXT: v_bfe_u32 v0, v0, 0, 16
|
||||
; GCN-NEXT: ; return to shader part epilog
|
||||
%not.src1 = xor i16 %src1, -1
|
||||
%and = and i16 %src0, %not.src1
|
||||
%zext = zext i16 %and to i32
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6 %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
|
||||
; RUN: llc -global-isel -amdgpu-codegenprepare-widen-16-bit-ops=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6 %s
|
||||
; RUN: llc -global-isel -amdgpu-codegenprepare-widen-16-bit-ops=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
|
||||
|
||||
define amdgpu_ps i32 @s_orn2_i32(i32 inreg %src0, i32 inreg %src1) {
|
||||
; GCN-LABEL: s_orn2_i32:
|
||||
|
@ -196,58 +196,31 @@ define amdgpu_ps <2 x i32> @s_orn2_v2i32_commute(<2 x i32> inreg %src0, <2 x i32
|
|||
}
|
||||
|
||||
define amdgpu_ps i16 @s_orn2_i16(i16 inreg %src0, i16 inreg %src1) {
|
||||
; GFX6-LABEL: s_orn2_i16:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_orn2_b32 s0, s2, s3
|
||||
; GFX6-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX9-LABEL: s_orn2_i16:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_mov_b32 s0, 0xffff
|
||||
; GFX9-NEXT: s_and_b32 s1, s3, s0
|
||||
; GFX9-NEXT: s_xor_b32 s0, s1, s0
|
||||
; GFX9-NEXT: s_or_b32 s0, s2, s0
|
||||
; GFX9-NEXT: s_bfe_u32 s0, s0, 0x100000
|
||||
; GFX9-NEXT: ; return to shader part epilog
|
||||
; GCN-LABEL: s_orn2_i16:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_orn2_b32 s0, s2, s3
|
||||
; GCN-NEXT: ; return to shader part epilog
|
||||
%not.src1 = xor i16 %src1, -1
|
||||
%or = or i16 %src0, %not.src1
|
||||
ret i16 %or
|
||||
}
|
||||
|
||||
define amdgpu_ps i16 @s_orn2_i16_commute(i16 inreg %src0, i16 inreg %src1) {
|
||||
; GFX6-LABEL: s_orn2_i16_commute:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_orn2_b32 s0, s2, s3
|
||||
; GFX6-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX9-LABEL: s_orn2_i16_commute:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_mov_b32 s0, 0xffff
|
||||
; GFX9-NEXT: s_and_b32 s1, s3, s0
|
||||
; GFX9-NEXT: s_xor_b32 s0, s1, s0
|
||||
; GFX9-NEXT: s_or_b32 s0, s0, s2
|
||||
; GFX9-NEXT: s_bfe_u32 s0, s0, 0x100000
|
||||
; GFX9-NEXT: ; return to shader part epilog
|
||||
; GCN-LABEL: s_orn2_i16_commute:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_orn2_b32 s0, s2, s3
|
||||
; GCN-NEXT: ; return to shader part epilog
|
||||
%not.src1 = xor i16 %src1, -1
|
||||
%or = or i16 %not.src1, %src0
|
||||
ret i16 %or
|
||||
}
|
||||
|
||||
define amdgpu_ps { i16, i16 } @s_orn2_i16_multi_use(i16 inreg %src0, i16 inreg %src1) {
|
||||
; GFX6-LABEL: s_orn2_i16_multi_use:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_xor_b32 s1, s3, -1
|
||||
; GFX6-NEXT: s_orn2_b32 s0, s2, s3
|
||||
; GFX6-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX9-LABEL: s_orn2_i16_multi_use:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_mov_b32 s0, 0xffff
|
||||
; GFX9-NEXT: s_and_b32 s1, s3, s0
|
||||
; GFX9-NEXT: s_xor_b32 s1, s1, s0
|
||||
; GFX9-NEXT: s_or_b32 s0, s2, s1
|
||||
; GFX9-NEXT: s_bfe_u32 s0, s0, 0x100000
|
||||
; GFX9-NEXT: ; return to shader part epilog
|
||||
; GCN-LABEL: s_orn2_i16_multi_use:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_xor_b32 s1, s3, -1
|
||||
; GCN-NEXT: s_orn2_b32 s0, s2, s3
|
||||
; GCN-NEXT: ; return to shader part epilog
|
||||
%not.src1 = xor i16 %src1, -1
|
||||
%or = or i16 %src0, %not.src1
|
||||
%insert.0 = insertvalue { i16, i16 } undef, i16 %or, 0
|
||||
|
@ -256,23 +229,11 @@ define amdgpu_ps { i16, i16 } @s_orn2_i16_multi_use(i16 inreg %src0, i16 inreg %
|
|||
}
|
||||
|
||||
define amdgpu_ps { i16, i16 } @s_orn2_i16_multi_foldable_use(i16 inreg %src0, i16 inreg %src1, i16 inreg %src2) {
|
||||
; GFX6-LABEL: s_orn2_i16_multi_foldable_use:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_orn2_b32 s0, s2, s4
|
||||
; GFX6-NEXT: s_orn2_b32 s1, s3, s4
|
||||
; GFX6-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX9-LABEL: s_orn2_i16_multi_foldable_use:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_mov_b32 s1, 0xffff
|
||||
; GFX9-NEXT: s_and_b32 s0, s4, s1
|
||||
; GFX9-NEXT: s_xor_b32 s0, s0, s1
|
||||
; GFX9-NEXT: s_and_b32 s2, s2, s1
|
||||
; GFX9-NEXT: s_and_b32 s4, s0, s1
|
||||
; GFX9-NEXT: s_and_b32 s1, s3, s1
|
||||
; GFX9-NEXT: s_or_b32 s0, s2, s4
|
||||
; GFX9-NEXT: s_or_b32 s1, s1, s4
|
||||
; GFX9-NEXT: ; return to shader part epilog
|
||||
; GCN-LABEL: s_orn2_i16_multi_foldable_use:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_orn2_b32 s0, s2, s4
|
||||
; GCN-NEXT: s_orn2_b32 s1, s3, s4
|
||||
; GCN-NEXT: ; return to shader part epilog
|
||||
%not.src2 = xor i16 %src2, -1
|
||||
%or0 = or i16 %src0, %not.src2
|
||||
%or1 = or i16 %src1, %not.src2
|
||||
|
@ -308,21 +269,12 @@ define amdgpu_ps float @v_orn2_i16_sv(i16 inreg %src0, i16 %src1) {
|
|||
}
|
||||
|
||||
define amdgpu_ps float @v_orn2_i16_vs(i16 %src0, i16 inreg %src1) {
|
||||
; GFX6-LABEL: v_orn2_i16_vs:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_xor_b32 s0, s2, -1
|
||||
; GFX6-NEXT: v_or_b32_e32 v0, s0, v0
|
||||
; GFX6-NEXT: v_bfe_u32 v0, v0, 0, 16
|
||||
; GFX6-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX9-LABEL: v_orn2_i16_vs:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_mov_b32 s0, 0xffff
|
||||
; GFX9-NEXT: s_and_b32 s1, s2, s0
|
||||
; GFX9-NEXT: s_xor_b32 s0, s1, s0
|
||||
; GFX9-NEXT: v_or_b32_e32 v0, s0, v0
|
||||
; GFX9-NEXT: v_bfe_u32 v0, v0, 0, 16
|
||||
; GFX9-NEXT: ; return to shader part epilog
|
||||
; GCN-LABEL: v_orn2_i16_vs:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_xor_b32 s0, s2, -1
|
||||
; GCN-NEXT: v_or_b32_e32 v0, s0, v0
|
||||
; GCN-NEXT: v_bfe_u32 v0, v0, 0, 16
|
||||
; GCN-NEXT: ; return to shader part epilog
|
||||
%not.src1 = xor i16 %src1, -1
|
||||
%or = or i16 %src0, %not.src1
|
||||
%zext = zext i16 %or to i32
|
||||
|
|
Loading…
Reference in New Issue