forked from OSchip/llvm-project
AMDGPU: Fold fneg into fadd
Patch mostly by Fiona Glaser llvm-svn: 291731
This commit is contained in:
parent
f91e47374c
commit
2529fba989
|
@ -477,6 +477,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
|
|||
setTargetDAGCombine(ISD::STORE);
|
||||
setTargetDAGCombine(ISD::FADD);
|
||||
setTargetDAGCombine(ISD::FSUB);
|
||||
setTargetDAGCombine(ISD::FNEG);
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -2805,6 +2806,63 @@ SDValue AMDGPUTargetLowering::performSelectCombine(SDNode *N,
|
|||
return performCtlzCombine(SDLoc(N), Cond, True, False, DCI);
|
||||
}
|
||||
|
||||
static bool fnegFoldsIntoOp(unsigned Opc) {
|
||||
switch (Opc) {
|
||||
case ISD::FADD:
|
||||
case ISD::FSUB:
|
||||
case ISD::FMUL:
|
||||
case ISD::FMA:
|
||||
case ISD::FMAD:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
|
||||
DAGCombinerInfo &DCI) const {
|
||||
SelectionDAG &DAG = DCI.DAG;
|
||||
SDValue N0 = N->getOperand(0);
|
||||
EVT VT = N->getValueType(0);
|
||||
|
||||
unsigned Opc = N0.getOpcode();
|
||||
|
||||
// If the input has multiple uses and we can either fold the negate down, or
|
||||
// the other uses cannot, give up. This both prevents unprofitable
|
||||
// transformations and infinite loops: we won't repeatedly try to fold around
|
||||
// a negate that has no 'good' form.
|
||||
//
|
||||
// TODO: Check users can fold
|
||||
if (fnegFoldsIntoOp(Opc) && !N0.hasOneUse())
|
||||
return SDValue();
|
||||
|
||||
SDLoc SL(N);
|
||||
switch (Opc) {
|
||||
case ISD::FADD: {
|
||||
// (fneg (fadd x, y)) -> (fadd (fneg x), (fneg y))
|
||||
SDValue LHS = N0.getOperand(0);
|
||||
SDValue RHS = N0.getOperand(1);
|
||||
|
||||
if (LHS.getOpcode() != ISD::FNEG)
|
||||
LHS = DAG.getNode(ISD::FNEG, SL, VT, LHS);
|
||||
else
|
||||
LHS = LHS.getOperand(0);
|
||||
|
||||
if (RHS.getOpcode() != ISD::FNEG)
|
||||
RHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);
|
||||
else
|
||||
RHS = RHS.getOperand(0);
|
||||
|
||||
SDValue Res = DAG.getNode(ISD::FADD, SL, VT, LHS, RHS);
|
||||
if (!N0.hasOneUse())
|
||||
DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res));
|
||||
return Res;
|
||||
}
|
||||
default:
|
||||
return SDValue();
|
||||
}
|
||||
}
|
||||
|
||||
SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
|
||||
DAGCombinerInfo &DCI) const {
|
||||
SelectionDAG &DAG = DCI.DAG;
|
||||
|
@ -2910,6 +2968,8 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
|
|||
return performMulLoHi24Combine(N, DCI);
|
||||
case ISD::SELECT:
|
||||
return performSelectCombine(N, DCI);
|
||||
case ISD::FNEG:
|
||||
return performFNegCombine(N, DCI);
|
||||
case AMDGPUISD::BFE_I32:
|
||||
case AMDGPUISD::BFE_U32: {
|
||||
assert(!N->getValueType(0).isVector() &&
|
||||
|
|
|
@ -84,6 +84,7 @@ protected:
|
|||
SDValue performCtlzCombine(const SDLoc &SL, SDValue Cond, SDValue LHS,
|
||||
SDValue RHS, DAGCombinerInfo &DCI) const;
|
||||
SDValue performSelectCombine(SDNode *N, DAGCombinerInfo &DCI) const;
|
||||
SDValue performFNegCombine(SDNode *N, DAGCombinerInfo &DCI) const;
|
||||
|
||||
static EVT getEquivalentMemType(LLVMContext &Context, EVT VT);
|
||||
|
||||
|
|
|
@ -0,0 +1,179 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s
|
||||
|
||||
; GCN-LABEL: {{^}}v_fneg_add_f32:
|
||||
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
||||
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
||||
; GCN: v_sub_f32_e64 [[RESULT:v[0-9]+]], -[[A]], [[B]]
|
||||
; GCN-NEXT: buffer_store_dword [[RESULT]]
|
||||
define void @v_fneg_add_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tid.ext = sext i32 %tid to i64
|
||||
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
|
||||
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
|
||||
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
|
||||
%a = load volatile float, float addrspace(1)* %a.gep
|
||||
%b = load volatile float, float addrspace(1)* %b.gep
|
||||
%add = fadd float %a, %b
|
||||
%fneg = fsub float -0.000000e+00, %add
|
||||
store float %fneg, float addrspace(1)* %out.gep
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_fneg_add_store_use_add_f32:
|
||||
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
||||
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
||||
; GCN-DAG: v_add_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
|
||||
; GCN-DAG: v_xor_b32_e32 [[NEG_ADD:v[0-9]+]], 0x80000000, [[ADD]]
|
||||
; GCN-NEXT: buffer_store_dword [[NEG_ADD]]
|
||||
; GCN-NEXT: buffer_store_dword [[ADD]]
|
||||
define void @v_fneg_add_store_use_add_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tid.ext = sext i32 %tid to i64
|
||||
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
|
||||
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
|
||||
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
|
||||
%a = load volatile float, float addrspace(1)* %a.gep
|
||||
%b = load volatile float, float addrspace(1)* %b.gep
|
||||
%add = fadd float %a, %b
|
||||
%fneg = fsub float -0.000000e+00, %add
|
||||
store volatile float %fneg, float addrspace(1)* %out
|
||||
store volatile float %add, float addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_fneg_add_multi_use_add_f32:
|
||||
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
||||
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
||||
; GCN-DAG: v_add_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
|
||||
; GCN-DAG: v_xor_b32_e32 [[NEG_ADD:v[0-9]+]], 0x80000000, [[ADD]]
|
||||
; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], 4.0, [[ADD]]
|
||||
; GCN-NEXT: buffer_store_dword [[NEG_ADD]]
|
||||
; GCN-NEXT: buffer_store_dword [[MUL]]
|
||||
define void @v_fneg_add_multi_use_add_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tid.ext = sext i32 %tid to i64
|
||||
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
|
||||
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
|
||||
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
|
||||
%a = load volatile float, float addrspace(1)* %a.gep
|
||||
%b = load volatile float, float addrspace(1)* %b.gep
|
||||
%add = fadd float %a, %b
|
||||
%fneg = fsub float -0.000000e+00, %add
|
||||
%use1 = fmul float %add, 4.0
|
||||
store volatile float %fneg, float addrspace(1)* %out
|
||||
store volatile float %use1, float addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_fneg_add_fneg_x_f32:
|
||||
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
||||
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
||||
; GCN: v_subrev_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
|
||||
; GCN-NEXT: buffer_store_dword [[ADD]]
|
||||
define void @v_fneg_add_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tid.ext = sext i32 %tid to i64
|
||||
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
|
||||
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
|
||||
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
|
||||
%a = load volatile float, float addrspace(1)* %a.gep
|
||||
%b = load volatile float, float addrspace(1)* %b.gep
|
||||
%fneg.a = fsub float -0.000000e+00, %a
|
||||
%add = fadd float %fneg.a, %b
|
||||
%fneg = fsub float -0.000000e+00, %add
|
||||
store volatile float %fneg, float addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_fneg_add_x_fneg_f32:
|
||||
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
||||
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
||||
; GCN: v_subrev_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
|
||||
; GCN-NEXT: buffer_store_dword [[ADD]]
|
||||
define void @v_fneg_add_x_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tid.ext = sext i32 %tid to i64
|
||||
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
|
||||
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
|
||||
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
|
||||
%a = load volatile float, float addrspace(1)* %a.gep
|
||||
%b = load volatile float, float addrspace(1)* %b.gep
|
||||
%fneg.b = fsub float -0.000000e+00, %b
|
||||
%add = fadd float %a, %fneg.b
|
||||
%fneg = fsub float -0.000000e+00, %add
|
||||
store volatile float %fneg, float addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_fneg_add_fneg_fneg_f32:
|
||||
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
||||
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
||||
; GCN: v_add_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
|
||||
; GCN-NEXT: buffer_store_dword [[ADD]]
|
||||
define void @v_fneg_add_fneg_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tid.ext = sext i32 %tid to i64
|
||||
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
|
||||
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
|
||||
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
|
||||
%a = load volatile float, float addrspace(1)* %a.gep
|
||||
%b = load volatile float, float addrspace(1)* %b.gep
|
||||
%fneg.a = fsub float -0.000000e+00, %a
|
||||
%fneg.b = fsub float -0.000000e+00, %b
|
||||
%add = fadd float %fneg.a, %fneg.b
|
||||
%fneg = fsub float -0.000000e+00, %add
|
||||
store volatile float %fneg, float addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_fneg_add_store_use_fneg_x_f32:
|
||||
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
||||
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
||||
; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
|
||||
; GCN-DAG: v_subrev_f32_e32 [[NEG_ADD:v[0-9]+]], [[B]], [[A]]
|
||||
; GCN-NEXT: buffer_store_dword [[NEG_ADD]]
|
||||
; GCN-NEXT: buffer_store_dword [[NEG_A]]
|
||||
define void @v_fneg_add_store_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tid.ext = sext i32 %tid to i64
|
||||
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
|
||||
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
|
||||
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
|
||||
%a = load volatile float, float addrspace(1)* %a.gep
|
||||
%b = load volatile float, float addrspace(1)* %b.gep
|
||||
%fneg.a = fsub float -0.000000e+00, %a
|
||||
%add = fadd float %fneg.a, %b
|
||||
%fneg = fsub float -0.000000e+00, %add
|
||||
store volatile float %fneg, float addrspace(1)* %out
|
||||
store volatile float %fneg.a, float addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_fneg_add_multi_use_fneg_x_f32:
|
||||
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
||||
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
||||
; GCN-DAG: v_subrev_f32_e32 [[NEG_ADD:v[0-9]+]], [[B]], [[A]]
|
||||
; GCN-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
|
||||
; GCN-NEXT: buffer_store_dword [[NEG_ADD]]
|
||||
; GCN-NEXT: buffer_store_dword [[MUL]]
|
||||
define void @v_fneg_add_multi_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float %c) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tid.ext = sext i32 %tid to i64
|
||||
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
|
||||
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
|
||||
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
|
||||
%a = load volatile float, float addrspace(1)* %a.gep
|
||||
%b = load volatile float, float addrspace(1)* %b.gep
|
||||
%fneg.a = fsub float -0.000000e+00, %a
|
||||
%add = fadd float %fneg.a, %b
|
||||
%fneg = fsub float -0.000000e+00, %add
|
||||
%use1 = fmul float %fneg.a, %c
|
||||
store volatile float %fneg, float addrspace(1)* %out
|
||||
store volatile float %use1, float addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone }
|
Loading…
Reference in New Issue