forked from OSchip/llvm-project
AMDGPU: Make i64 loads/stores promote to v2i32
Now that unaligned access expansion should not attempt to produce i64 accesses, we can remove the hack in PreprocessISelDAG where this is done. This allows splitting i64 private accesses while allowing the new add nodes indexing the vector components can be folded with the base pointer arithmetic. llvm-svn: 268293
This commit is contained in:
parent
5002a67ef2
commit
2b957b5a6f
|
@ -1586,61 +1586,6 @@ void AMDGPUDAGToDAGISel::PreprocessISelDAG() {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
// XXX - Other targets seem to be able to do this without a worklist.
|
||||
SmallVector<LoadSDNode *, 8> LoadsToReplace;
|
||||
SmallVector<StoreSDNode *, 8> StoresToReplace;
|
||||
|
||||
for (SDNode &Node : CurDAG->allnodes()) {
|
||||
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(&Node)) {
|
||||
EVT VT = LD->getValueType(0);
|
||||
if (VT != MVT::i64 || LD->getExtensionType() != ISD::NON_EXTLOAD)
|
||||
continue;
|
||||
|
||||
// To simplify the TableGen patters, we replace all i64 loads with v2i32
|
||||
// loads. Alternatively, we could promote i64 loads to v2i32 during DAG
|
||||
// legalization, however, so places (ExpandUnalignedLoad) in the DAG
|
||||
// legalizer assume that if i64 is legal, so doing this promotion early
|
||||
// can cause problems.
|
||||
LoadsToReplace.push_back(LD);
|
||||
} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(&Node)) {
|
||||
// Handle i64 stores here for the same reason mentioned above for loads.
|
||||
SDValue Value = ST->getValue();
|
||||
if (Value.getValueType() != MVT::i64 || ST->isTruncatingStore())
|
||||
continue;
|
||||
StoresToReplace.push_back(ST);
|
||||
}
|
||||
}
|
||||
|
||||
for (LoadSDNode *LD : LoadsToReplace) {
|
||||
SDLoc SL(LD);
|
||||
|
||||
SDValue NewLoad = CurDAG->getLoad(MVT::v2i32, SL, LD->getChain(),
|
||||
LD->getBasePtr(), LD->getMemOperand());
|
||||
SDValue BitCast = CurDAG->getNode(ISD::BITCAST, SL,
|
||||
MVT::i64, NewLoad);
|
||||
CurDAG->ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLoad.getValue(1));
|
||||
CurDAG->ReplaceAllUsesOfValueWith(SDValue(LD, 0), BitCast);
|
||||
Modified = true;
|
||||
}
|
||||
|
||||
for (StoreSDNode *ST : StoresToReplace) {
|
||||
SDValue NewValue = CurDAG->getNode(ISD::BITCAST, SDLoc(ST),
|
||||
MVT::v2i32, ST->getValue());
|
||||
const SDValue StoreOps[] = {
|
||||
ST->getChain(),
|
||||
NewValue,
|
||||
ST->getBasePtr(),
|
||||
ST->getOffset()
|
||||
};
|
||||
|
||||
CurDAG->UpdateNodeOperands(ST, StoreOps);
|
||||
Modified = true;
|
||||
}
|
||||
|
||||
// XXX - Is this necessary?
|
||||
if (Modified)
|
||||
CurDAG->RemoveDeadNodes();
|
||||
}
|
||||
|
||||
void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
|
||||
|
|
|
@ -95,12 +95,24 @@ SITargetLowering::SITargetLowering(TargetMachine &TM,
|
|||
setOperationAction(ISD::LOAD, MVT::v8i32, Custom);
|
||||
setOperationAction(ISD::LOAD, MVT::v16i32, Custom);
|
||||
|
||||
setOperationAction(ISD::LOAD, MVT::f64, Promote);
|
||||
AddPromotedToType(ISD::LOAD, MVT::f64, MVT::v2i32);
|
||||
|
||||
setOperationAction(ISD::LOAD, MVT::i64, Promote);
|
||||
AddPromotedToType(ISD::LOAD, MVT::i64, MVT::v2i32);
|
||||
|
||||
setOperationAction(ISD::STORE, MVT::v8i32, Custom);
|
||||
setOperationAction(ISD::STORE, MVT::v16i32, Custom);
|
||||
|
||||
setOperationAction(ISD::STORE, MVT::i1, Custom);
|
||||
setOperationAction(ISD::STORE, MVT::v4i32, Custom);
|
||||
|
||||
setOperationAction(ISD::STORE, MVT::f64, Promote);
|
||||
AddPromotedToType(ISD::STORE, MVT::f64, MVT::v2i32);
|
||||
|
||||
setOperationAction(ISD::STORE, MVT::i64, Promote);
|
||||
AddPromotedToType(ISD::STORE, MVT::i64, MVT::v2i32);
|
||||
|
||||
setOperationAction(ISD::SELECT, MVT::i64, Custom);
|
||||
setOperationAction(ISD::SELECT, MVT::f64, Promote);
|
||||
AddPromotedToType(ISD::SELECT, MVT::f64, MVT::i64);
|
||||
|
|
|
@ -14,7 +14,7 @@ define void @materialize_0_i32(i32 addrspace(1)* %out) {
|
|||
|
||||
; GCN-LABEL: {{^}}materialize_0_i64:
|
||||
; GCN: v_mov_b32_e32 v[[LOK:[0-9]+]], 0{{$}}
|
||||
; GCN: v_mov_b32_e32 v[[HIK:[0-9]+]], 0{{$}}
|
||||
; GCN: v_mov_b32_e32 v[[HIK:[0-9]+]], v[[LOK]]{{$}}
|
||||
; GCN: buffer_store_dwordx2 v{{\[}}[[LOK]]:[[HIK]]{{\]}}
|
||||
define void @materialize_0_i64(i64 addrspace(1)* %out) {
|
||||
store i64 0, i64 addrspace(1)* %out
|
||||
|
@ -31,7 +31,7 @@ define void @materialize_neg1_i32(i32 addrspace(1)* %out) {
|
|||
|
||||
; GCN-LABEL: {{^}}materialize_neg1_i64:
|
||||
; GCN: v_mov_b32_e32 v[[LOK:[0-9]+]], -1{{$}}
|
||||
; GCN: v_mov_b32_e32 v[[HIK:[0-9]+]], -1{{$}}
|
||||
; GCN: v_mov_b32_e32 v[[HIK:[0-9]+]], v[[LOK]]{{$}}
|
||||
; GCN: buffer_store_dwordx2 v{{\[}}[[LOK]]:[[HIK]]{{\]}}
|
||||
define void @materialize_neg1_i64(i64 addrspace(1)* %out) {
|
||||
store i64 -1, i64 addrspace(1)* %out
|
||||
|
|
|
@ -172,7 +172,7 @@ define void @s_test_canonicalize_var_f64(double addrspace(1)* %out, double %val)
|
|||
|
||||
; GCN-LABEL: {{^}}test_fold_canonicalize_p0_f64:
|
||||
; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
|
||||
; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
|
||||
; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], v[[LO]]{{$}}
|
||||
; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
|
||||
define void @test_fold_canonicalize_p0_f64(double addrspace(1)* %out) #1 {
|
||||
%canonicalized = call double @llvm.canonicalize.f64(double 0.0)
|
||||
|
@ -225,7 +225,7 @@ define void @test_fold_canonicalize_literal_f64(double addrspace(1)* %out) #1 {
|
|||
; DENORM-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0xfffff{{$}}
|
||||
|
||||
; NODENORM: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
|
||||
; NODENORM: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
|
||||
; NODENORM: v_mov_b32_e32 v[[HI:[0-9]+]], v[[LO]]{{$}}
|
||||
; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
|
||||
define void @test_fold_canonicalize_denormal0_f64(double addrspace(1)* %out) #1 {
|
||||
%canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 4503599627370495 to double))
|
||||
|
@ -238,7 +238,7 @@ define void @test_fold_canonicalize_denormal0_f64(double addrspace(1)* %out) #1
|
|||
; DENORM-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x800fffff{{$}}
|
||||
|
||||
; NODENORM: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
|
||||
; NODENORM: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
|
||||
; NODENORM: v_mov_b32_e32 v[[HI:[0-9]+]], v[[LO]]{{$}}
|
||||
; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
|
||||
define void @test_fold_canonicalize_denormal1_f64(double addrspace(1)* %out) #1 {
|
||||
%canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9227875636482146303 to double))
|
||||
|
|
|
@ -510,7 +510,7 @@ define void @add_inline_imm_64_f64(double addrspace(1)* %out, double %x) {
|
|||
|
||||
; CHECK-LABEL: {{^}}store_inline_imm_0.0_f64:
|
||||
; CHECK: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0
|
||||
; CHECK: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0
|
||||
; CHECK: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], v[[LO_VREG]]{{$}}
|
||||
; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
|
||||
define void @store_inline_imm_0.0_f64(double addrspace(1)* %out) {
|
||||
store double 0.0, double addrspace(1)* %out
|
||||
|
|
|
@ -4,12 +4,11 @@
|
|||
; GCN-LABEL: {{^}}v_uextract_bit_31_i128:
|
||||
; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
|
||||
; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], 0{{$}}
|
||||
; GCN: v_mov_b32_e32 v[[ZERO0:[0-9]+]], 0{{$}}
|
||||
; GCN: v_mov_b32_e32 v[[ZERO2:[0-9]+]], 0{{$}}
|
||||
; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO0]]{{$}}
|
||||
; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]]
|
||||
|
||||
; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[ZERO1]]:[[ZERO2]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
|
||||
; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[ZERO0]]:[[ZERO1]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
|
||||
; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO0]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; GCN: s_endpgm
|
||||
define void @v_uextract_bit_31_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %in) #1 {
|
||||
|
@ -27,12 +26,11 @@ define void @v_uextract_bit_31_i128(i128 addrspace(1)* %out, i128 addrspace(1)*
|
|||
; GCN-LABEL: {{^}}v_uextract_bit_63_i128:
|
||||
; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
|
||||
|
||||
; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], 0{{$}}
|
||||
; GCN: v_mov_b32_e32 v[[ZERO2:[0-9]+]], 0{{$}}
|
||||
; GCN: v_mov_b32_e32 v[[ZERO0:[0-9]+]], 0{{$}}
|
||||
; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO0]]{{$}}
|
||||
; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]]
|
||||
|
||||
; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[ZERO1]]:[[ZERO2]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
|
||||
; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[ZERO0]]:[[ZERO1]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
|
||||
; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO0]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; GCN: s_endpgm
|
||||
define void @v_uextract_bit_63_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %in) #1 {
|
||||
|
@ -50,12 +48,11 @@ define void @v_uextract_bit_63_i128(i128 addrspace(1)* %out, i128 addrspace(1)*
|
|||
; GCN-LABEL: {{^}}v_uextract_bit_95_i128:
|
||||
; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
|
||||
|
||||
; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], 0{{$}}
|
||||
; GCN: v_mov_b32_e32 v[[ZERO2:[0-9]+]], 0{{$}}
|
||||
; GCN: v_mov_b32_e32 v[[ZERO0:[0-9]+]], 0{{$}}
|
||||
; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO0]]{{$}}
|
||||
; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]]
|
||||
|
||||
; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[ZERO1]]:[[ZERO2]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
|
||||
; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[ZERO0]]:[[ZERO1]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
|
||||
; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO0]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; GCN: s_endpgm
|
||||
define void @v_uextract_bit_95_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %in) #1 {
|
||||
|
@ -73,12 +70,11 @@ define void @v_uextract_bit_95_i128(i128 addrspace(1)* %out, i128 addrspace(1)*
|
|||
; GCN-LABEL: {{^}}v_uextract_bit_127_i128:
|
||||
; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}}
|
||||
|
||||
; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], 0{{$}}
|
||||
; GCN: v_mov_b32_e32 v[[ZERO2:[0-9]+]], 0{{$}}
|
||||
; GCN: v_mov_b32_e32 v[[ZERO0:[0-9]+]], 0{{$}}
|
||||
; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO0]]{{$}}
|
||||
; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]]
|
||||
|
||||
; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[ZERO1]]:[[ZERO2]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
|
||||
; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[ZERO0]]:[[ZERO1]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
|
||||
; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO0]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; GCN: s_endpgm
|
||||
define void @v_uextract_bit_127_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %in) #1 {
|
||||
|
|
Loading…
Reference in New Issue