forked from OSchip/llvm-project
[AArch64][GlobalISel] Add a new reassociation for G_PTR_ADDs.
G_PTR_ADD (G_PTR_ADD X, C), Y) -> (G_PTR_ADD (G_PTR_ADD(X, Y), C) Improves CTMark -Os on AArch64: Program before after diff sqlite3 286932 287024 0.0% kc 432512 432508 -0.0% SPASS 412788 412764 -0.0% pairlocalalign 249460 249416 -0.0% bullet 475740 475512 -0.0% 7zip-benchmark 568864 568356 -0.1% consumer-typeset 419088 418648 -0.1% tramp3d-v4 367628 367224 -0.1% clamscan 383184 382732 -0.1% lencod 430028 429284 -0.2% Geomean difference -0.1% Differential Revision: https://reviews.llvm.org/D109528
This commit is contained in:
parent
1ac209ed76
commit
5ec1845cad
|
@ -577,6 +577,14 @@ public:
|
|||
/// Match: shr (shl x, n), k -> sbfx/ubfx x, pos, width
|
||||
bool matchBitfieldExtractFromShr(MachineInstr &MI, BuildFnTy &MatchInfo);
|
||||
|
||||
// Helpers for reassociation:
|
||||
bool matchReassocConstantInnerRHS(GPtrAdd &MI, MachineInstr *RHS,
|
||||
BuildFnTy &MatchInfo);
|
||||
bool matchReassocFoldConstantsInSubTree(GPtrAdd &MI, MachineInstr *LHS,
|
||||
MachineInstr *RHS,
|
||||
BuildFnTy &MatchInfo);
|
||||
bool matchReassocConstantInnerLHS(GPtrAdd &MI, MachineInstr *LHS,
|
||||
MachineInstr *RHS, BuildFnTy &MatchInfo);
|
||||
/// Reassociate pointer calculations with G_ADD involved, to allow better
|
||||
/// addressing mode usage.
|
||||
bool matchReassocPtrAdd(MachineInstr &MI, BuildFnTy &MatchInfo);
|
||||
|
|
|
@ -4090,9 +4090,91 @@ bool CombinerHelper::reassociationCanBreakAddressingModePattern(
|
|||
return false;
|
||||
}
|
||||
|
||||
bool CombinerHelper::matchReassocPtrAdd(
|
||||
MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
|
||||
assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD);
|
||||
bool CombinerHelper::matchReassocConstantInnerRHS(GPtrAdd &MI,
|
||||
MachineInstr *RHS,
|
||||
BuildFnTy &MatchInfo) {
|
||||
// G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
|
||||
Register Src1Reg = MI.getOperand(1).getReg();
|
||||
if (RHS->getOpcode() != TargetOpcode::G_ADD)
|
||||
return false;
|
||||
auto C2 = getConstantVRegVal(RHS->getOperand(2).getReg(), MRI);
|
||||
if (!C2)
|
||||
return false;
|
||||
|
||||
MatchInfo = [=, &MI](MachineIRBuilder &B) {
|
||||
LLT PtrTy = MRI.getType(MI.getOperand(0).getReg());
|
||||
|
||||
auto NewBase =
|
||||
Builder.buildPtrAdd(PtrTy, Src1Reg, RHS->getOperand(1).getReg());
|
||||
Observer.changingInstr(MI);
|
||||
MI.getOperand(1).setReg(NewBase.getReg(0));
|
||||
MI.getOperand(2).setReg(RHS->getOperand(2).getReg());
|
||||
Observer.changedInstr(MI);
|
||||
};
|
||||
return !reassociationCanBreakAddressingModePattern(MI);
|
||||
}
|
||||
|
||||
bool CombinerHelper::matchReassocConstantInnerLHS(GPtrAdd &MI,
|
||||
MachineInstr *LHS,
|
||||
MachineInstr *RHS,
|
||||
BuildFnTy &MatchInfo) {
|
||||
// G_PTR_ADD (G_PTR_ADD X, C), Y) -> (G_PTR_ADD (G_PTR_ADD(X, Y), C)
|
||||
// if and only if (G_PTR_ADD X, C) has one use.
|
||||
Register LHSBase;
|
||||
Register LHSCstOff;
|
||||
if (!mi_match(MI.getBaseReg(), MRI,
|
||||
m_OneNonDBGUse(m_GPtrAdd(m_Reg(LHSBase), m_ICst(LHSCstOff)))))
|
||||
return false;
|
||||
|
||||
auto *LHSPtrAdd = cast<GPtrAdd>(LHS);
|
||||
MatchInfo = [=, &MI](MachineIRBuilder &B) {
|
||||
// When we change LHSPtrAdd's offset register we might cause it to use a reg
|
||||
// before its def. Sink the instruction so the outer PTR_ADD to ensure this
|
||||
// doesn't happen.
|
||||
LHSPtrAdd->moveBefore(&MI);
|
||||
Register RHSReg = MI.getOffsetReg();
|
||||
Observer.changingInstr(MI);
|
||||
MI.getOperand(2).setReg(LHSCstOff);
|
||||
Observer.changedInstr(MI);
|
||||
Observer.changingInstr(*LHSPtrAdd);
|
||||
LHSPtrAdd->getOperand(2).setReg(RHSReg);
|
||||
Observer.changedInstr(*LHSPtrAdd);
|
||||
};
|
||||
return !reassociationCanBreakAddressingModePattern(MI);
|
||||
}
|
||||
|
||||
bool CombinerHelper::matchReassocFoldConstantsInSubTree(GPtrAdd &MI,
|
||||
MachineInstr *LHS,
|
||||
MachineInstr *RHS,
|
||||
BuildFnTy &MatchInfo) {
|
||||
// G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
|
||||
auto *LHSPtrAdd = dyn_cast<GPtrAdd>(LHS);
|
||||
if (!LHSPtrAdd)
|
||||
return false;
|
||||
|
||||
Register Src2Reg = MI.getOperand(2).getReg();
|
||||
Register LHSSrc1 = LHSPtrAdd->getBaseReg();
|
||||
Register LHSSrc2 = LHSPtrAdd->getOffsetReg();
|
||||
auto C1 = getConstantVRegVal(LHSSrc2, MRI);
|
||||
if (!C1)
|
||||
return false;
|
||||
auto C2 = getConstantVRegVal(Src2Reg, MRI);
|
||||
if (!C2)
|
||||
return false;
|
||||
|
||||
MatchInfo = [=, &MI](MachineIRBuilder &B) {
|
||||
auto NewCst = B.buildConstant(MRI.getType(Src2Reg), *C1 + *C2);
|
||||
Observer.changingInstr(MI);
|
||||
MI.getOperand(1).setReg(LHSSrc1);
|
||||
MI.getOperand(2).setReg(NewCst.getReg(0));
|
||||
Observer.changedInstr(MI);
|
||||
};
|
||||
return !reassociationCanBreakAddressingModePattern(MI);
|
||||
}
|
||||
|
||||
bool CombinerHelper::matchReassocPtrAdd(MachineInstr &MI,
|
||||
BuildFnTy &MatchInfo) {
|
||||
auto &PtrAdd = cast<GPtrAdd>(MI);
|
||||
// We're trying to match a few pointer computation patterns here for
|
||||
// re-association opportunities.
|
||||
// 1) Isolating a constant operand to be on the RHS, e.g.:
|
||||
|
@ -4101,49 +4183,26 @@ bool CombinerHelper::matchReassocPtrAdd(
|
|||
// 2) Folding two constants in each sub-tree as long as such folding
|
||||
// doesn't break a legal addressing mode.
|
||||
// G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
|
||||
Register Src1Reg = MI.getOperand(1).getReg();
|
||||
Register Src2Reg = MI.getOperand(2).getReg();
|
||||
MachineInstr *LHS = MRI.getVRegDef(Src1Reg);
|
||||
MachineInstr *RHS = MRI.getVRegDef(Src2Reg);
|
||||
//
|
||||
// 3) Move a constant from the LHS of an inner op to the RHS of the outer.
|
||||
// G_PTR_ADD (G_PTR_ADD X, C), Y) -> G_PTR_ADD (G_PTR_ADD(X, Y), C)
|
||||
// iif (G_PTR_ADD X, C) has one use.
|
||||
MachineInstr *LHS = MRI.getVRegDef(PtrAdd.getBaseReg());
|
||||
MachineInstr *RHS = MRI.getVRegDef(PtrAdd.getOffsetReg());
|
||||
|
||||
if (LHS->getOpcode() != TargetOpcode::G_PTR_ADD) {
|
||||
// Try to match example 1).
|
||||
if (RHS->getOpcode() != TargetOpcode::G_ADD)
|
||||
return false;
|
||||
auto C2 = getConstantVRegVal(RHS->getOperand(2).getReg(), MRI);
|
||||
if (!C2)
|
||||
return false;
|
||||
// Try to match example 2.
|
||||
if (matchReassocFoldConstantsInSubTree(PtrAdd, LHS, RHS, MatchInfo))
|
||||
return true;
|
||||
|
||||
MatchInfo = [=,&MI](MachineIRBuilder &B) {
|
||||
LLT PtrTy = MRI.getType(MI.getOperand(0).getReg());
|
||||
// Try to match example 3.
|
||||
if (matchReassocConstantInnerLHS(PtrAdd, LHS, RHS, MatchInfo))
|
||||
return true;
|
||||
|
||||
auto NewBase =
|
||||
Builder.buildPtrAdd(PtrTy, Src1Reg, RHS->getOperand(1).getReg());
|
||||
Observer.changingInstr(MI);
|
||||
MI.getOperand(1).setReg(NewBase.getReg(0));
|
||||
MI.getOperand(2).setReg(RHS->getOperand(2).getReg());
|
||||
Observer.changedInstr(MI);
|
||||
};
|
||||
} else {
|
||||
// Try to match example 2.
|
||||
Register LHSSrc1 = LHS->getOperand(1).getReg();
|
||||
Register LHSSrc2 = LHS->getOperand(2).getReg();
|
||||
auto C1 = getConstantVRegVal(LHSSrc2, MRI);
|
||||
if (!C1)
|
||||
return false;
|
||||
auto C2 = getConstantVRegVal(Src2Reg, MRI);
|
||||
if (!C2)
|
||||
return false;
|
||||
// Try to match example 1.
|
||||
if (matchReassocConstantInnerRHS(PtrAdd, RHS, MatchInfo))
|
||||
return true;
|
||||
|
||||
MatchInfo = [=, &MI](MachineIRBuilder &B) {
|
||||
auto NewCst = B.buildConstant(MRI.getType(Src2Reg), *C1 + *C2);
|
||||
Observer.changingInstr(MI);
|
||||
MI.getOperand(1).setReg(LHSSrc1);
|
||||
MI.getOperand(2).setReg(NewCst.getReg(0));
|
||||
Observer.changedInstr(MI);
|
||||
};
|
||||
}
|
||||
return !reassociationCanBreakAddressingModePattern(MI);
|
||||
return false;
|
||||
}
|
||||
|
||||
bool CombinerHelper::matchConstantFold(MachineInstr &MI, APInt &MatchInfo) {
|
||||
|
|
|
@ -184,3 +184,82 @@ body: |
|
|||
G_STORE %ptr_to_int(s64), %10(p0) :: (store 8)
|
||||
$w0 = COPY %7(s32)
|
||||
RET_ReallyLR implicit $w0
|
||||
...
|
||||
---
|
||||
name: reassoc_cst_inner_lhs
|
||||
alignment: 4
|
||||
tracksRegLiveness: true
|
||||
liveins:
|
||||
- { reg: '$w0' }
|
||||
- { reg: '$x1' }
|
||||
- { reg: '$x2' }
|
||||
- { reg: '$x3' }
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $w0, $x1, $x2, $x3
|
||||
|
||||
; CHECK-LABEL: name: reassoc_cst_inner_lhs
|
||||
; CHECK: liveins: $w0, $x1, $x2, $x3
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x2
|
||||
; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x3
|
||||
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 40
|
||||
; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
|
||||
; CHECK: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY1]], [[C1]](s64)
|
||||
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[SHL]](s64)
|
||||
; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[C]](s64)
|
||||
; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32))
|
||||
; CHECK: $w0 = COPY [[LOAD]](s32)
|
||||
; CHECK: RET_ReallyLR
|
||||
%1:_(p0) = COPY $x1
|
||||
%2:_(p0) = COPY $x2
|
||||
%3:_(s64) = COPY $x3
|
||||
%8:_(s64) = G_CONSTANT i64 40
|
||||
%9:_(p0) = G_PTR_ADD %2, %8(s64)
|
||||
%10:_(s64) = G_CONSTANT i64 2
|
||||
%11:_(s64) = G_SHL %3, %10
|
||||
%12:_(p0) = G_PTR_ADD %9, %11(s64)
|
||||
%14:_(s32) = G_LOAD %12(p0) :: (load (s32))
|
||||
$w0 = COPY %14
|
||||
RET_ReallyLR
|
||||
|
||||
...
|
||||
---
|
||||
name: reassoc_cst_inner_lhs_multiuse
|
||||
alignment: 4
|
||||
tracksRegLiveness: true
|
||||
liveins:
|
||||
- { reg: '$w0' }
|
||||
- { reg: '$x1' }
|
||||
- { reg: '$x2' }
|
||||
- { reg: '$x3' }
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $w0, $x1, $x2, $x3
|
||||
|
||||
; CHECK-LABEL: name: reassoc_cst_inner_lhs_multiuse
|
||||
; CHECK: liveins: $w0, $x1, $x2, $x3
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x2
|
||||
; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x3
|
||||
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 40
|
||||
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
|
||||
; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
|
||||
; CHECK: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY1]], [[C1]](s64)
|
||||
; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[SHL]](s64)
|
||||
; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32))
|
||||
; CHECK: $w0 = COPY [[LOAD]](s32)
|
||||
; CHECK: $x0 = COPY [[PTR_ADD]](p0)
|
||||
; CHECK: RET_ReallyLR
|
||||
%1:_(p0) = COPY $x1
|
||||
%2:_(p0) = COPY $x2
|
||||
%3:_(s64) = COPY $x3
|
||||
%8:_(s64) = G_CONSTANT i64 40
|
||||
%9:_(p0) = G_PTR_ADD %2, %8(s64)
|
||||
%10:_(s64) = G_CONSTANT i64 2
|
||||
%11:_(s64) = G_SHL %3, %10
|
||||
%12:_(p0) = G_PTR_ADD %9, %11(s64)
|
||||
%14:_(s32) = G_LOAD %12(p0) :: (load (s32))
|
||||
$w0 = COPY %14
|
||||
$x0 = COPY %9
|
||||
RET_ReallyLR
|
||||
|
||||
...
|
||||
|
|
|
@ -336,32 +336,22 @@ define amdgpu_ps void @mubuf_store_vgpr_ptr_sgpr_offset_offset256(i32 addrspace(
|
|||
define amdgpu_ps void @mubuf_store_vgpr_ptr_sgpr_offset256_offset(i32 addrspace(1)* %ptr, i32 inreg %soffset) {
|
||||
; GFX6-LABEL: mubuf_store_vgpr_ptr_sgpr_offset256_offset:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_mov_b64 s[4:5], 0x400
|
||||
; GFX6-NEXT: v_mov_b32_e32 v2, s4
|
||||
; GFX6-NEXT: v_mov_b32_e32 v3, s5
|
||||
; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2
|
||||
; GFX6-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x200000
|
||||
; GFX6-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
|
||||
; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], 2
|
||||
; GFX6-NEXT: s_mov_b32 s2, 0
|
||||
; GFX6-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX6-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GFX6-NEXT: s_mov_b32 s2, s5
|
||||
; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
|
||||
; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 offset:1024
|
||||
; GFX6-NEXT: s_endpgm
|
||||
;
|
||||
; GFX7-LABEL: mubuf_store_vgpr_ptr_sgpr_offset256_offset:
|
||||
; GFX7: ; %bb.0:
|
||||
; GFX7-NEXT: s_mov_b64 s[4:5], 0x400
|
||||
; GFX7-NEXT: v_mov_b32_e32 v2, s4
|
||||
; GFX7-NEXT: v_mov_b32_e32 v3, s5
|
||||
; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2
|
||||
; GFX7-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x200000
|
||||
; GFX7-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
|
||||
; GFX7-NEXT: s_lshl_b64 s[0:1], s[0:1], 2
|
||||
; GFX7-NEXT: s_mov_b32 s2, 0
|
||||
; GFX7-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX7-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GFX7-NEXT: s_mov_b32 s2, s5
|
||||
; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
|
||||
; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 offset:1024
|
||||
; GFX7-NEXT: s_endpgm
|
||||
%gep0 = getelementptr i32, i32 addrspace(1)* %ptr, i32 256
|
||||
%gep1 = getelementptr i32, i32 addrspace(1)* %gep0, i32 %soffset
|
||||
|
@ -433,25 +423,27 @@ define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4095_vgpr_offset(i32 addrspace
|
|||
; GFX6-LABEL: mubuf_store_sgpr_ptr_offset4095_vgpr_offset:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v0
|
||||
; GFX6-NEXT: s_add_u32 s0, s2, 0x3ffc
|
||||
; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 2
|
||||
; GFX6-NEXT: s_addc_u32 s1, s3, 0
|
||||
; GFX6-NEXT: s_mov_b32 s0, s2
|
||||
; GFX6-NEXT: s_mov_b32 s1, s3
|
||||
; GFX6-NEXT: s_mov_b32 s2, 0
|
||||
; GFX6-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX6-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
|
||||
; GFX6-NEXT: s_movk_i32 s4, 0x3ffc
|
||||
; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], s4 addr64
|
||||
; GFX6-NEXT: s_endpgm
|
||||
;
|
||||
; GFX7-LABEL: mubuf_store_sgpr_ptr_offset4095_vgpr_offset:
|
||||
; GFX7: ; %bb.0:
|
||||
; GFX7-NEXT: v_ashrrev_i32_e32 v1, 31, v0
|
||||
; GFX7-NEXT: s_add_u32 s0, s2, 0x3ffc
|
||||
; GFX7-NEXT: v_lshl_b64 v[0:1], v[0:1], 2
|
||||
; GFX7-NEXT: s_addc_u32 s1, s3, 0
|
||||
; GFX7-NEXT: s_mov_b32 s0, s2
|
||||
; GFX7-NEXT: s_mov_b32 s1, s3
|
||||
; GFX7-NEXT: s_mov_b32 s2, 0
|
||||
; GFX7-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX7-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
|
||||
; GFX7-NEXT: s_movk_i32 s4, 0x3ffc
|
||||
; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], s4 addr64
|
||||
; GFX7-NEXT: s_endpgm
|
||||
%gep0 = getelementptr i32, i32 addrspace(1)* %ptr, i32 4095
|
||||
%gep1 = getelementptr i32, i32 addrspace(1)* %gep0, i32 %voffset
|
||||
|
@ -790,31 +782,21 @@ define amdgpu_ps float @mubuf_load_vgpr_ptr_sgpr_offset_offset256(float addrspac
|
|||
define amdgpu_ps float @mubuf_load_vgpr_ptr_sgpr_offset256_offset(float addrspace(1)* %ptr, i32 inreg %soffset) {
|
||||
; GFX6-LABEL: mubuf_load_vgpr_ptr_sgpr_offset256_offset:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_mov_b64 s[4:5], 0x400
|
||||
; GFX6-NEXT: v_mov_b32_e32 v2, s4
|
||||
; GFX6-NEXT: v_mov_b32_e32 v3, s5
|
||||
; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2
|
||||
; GFX6-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x200000
|
||||
; GFX6-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
|
||||
; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], 2
|
||||
; GFX6-NEXT: s_mov_b32 s2, 0
|
||||
; GFX6-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GFX6-NEXT: s_mov_b32 s2, s5
|
||||
; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc
|
||||
; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 offset:1024 glc
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX7-LABEL: mubuf_load_vgpr_ptr_sgpr_offset256_offset:
|
||||
; GFX7: ; %bb.0:
|
||||
; GFX7-NEXT: s_mov_b64 s[4:5], 0x400
|
||||
; GFX7-NEXT: v_mov_b32_e32 v2, s4
|
||||
; GFX7-NEXT: v_mov_b32_e32 v3, s5
|
||||
; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2
|
||||
; GFX7-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x200000
|
||||
; GFX7-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
|
||||
; GFX7-NEXT: s_lshl_b64 s[0:1], s[0:1], 2
|
||||
; GFX7-NEXT: s_mov_b32 s2, 0
|
||||
; GFX7-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GFX7-NEXT: s_mov_b32 s2, s5
|
||||
; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc
|
||||
; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 offset:1024 glc
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: ; return to shader part epilog
|
||||
%gep0 = getelementptr float, float addrspace(1)* %ptr, i64 256
|
||||
|
@ -887,24 +869,26 @@ define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4095_vgpr_offset(float addrspa
|
|||
; GFX6-LABEL: mubuf_load_sgpr_ptr_offset4095_vgpr_offset:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v0
|
||||
; GFX6-NEXT: s_add_u32 s4, s2, 0x3ffc
|
||||
; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 2
|
||||
; GFX6-NEXT: s_mov_b32 s6, 0
|
||||
; GFX6-NEXT: s_addc_u32 s5, s3, 0
|
||||
; GFX6-NEXT: s_mov_b32 s7, 0xf000
|
||||
; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 glc
|
||||
; GFX6-NEXT: s_mov_b32 s0, s2
|
||||
; GFX6-NEXT: s_mov_b32 s1, s3
|
||||
; GFX6-NEXT: s_mov_b32 s2, 0
|
||||
; GFX6-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GFX6-NEXT: s_movk_i32 s4, 0x3ffc
|
||||
; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], s4 addr64 glc
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX7-LABEL: mubuf_load_sgpr_ptr_offset4095_vgpr_offset:
|
||||
; GFX7: ; %bb.0:
|
||||
; GFX7-NEXT: v_ashrrev_i32_e32 v1, 31, v0
|
||||
; GFX7-NEXT: s_add_u32 s4, s2, 0x3ffc
|
||||
; GFX7-NEXT: v_lshl_b64 v[0:1], v[0:1], 2
|
||||
; GFX7-NEXT: s_mov_b32 s6, 0
|
||||
; GFX7-NEXT: s_addc_u32 s5, s3, 0
|
||||
; GFX7-NEXT: s_mov_b32 s7, 0xf000
|
||||
; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 glc
|
||||
; GFX7-NEXT: s_mov_b32 s0, s2
|
||||
; GFX7-NEXT: s_mov_b32 s1, s3
|
||||
; GFX7-NEXT: s_mov_b32 s2, 0
|
||||
; GFX7-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GFX7-NEXT: s_movk_i32 s4, 0x3ffc
|
||||
; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], s4 addr64 glc
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: ; return to shader part epilog
|
||||
%gep0 = getelementptr float, float addrspace(1)* %ptr, i64 4095
|
||||
|
|
Loading…
Reference in New Issue