AMDGPU: Make i64 loads/stores promote to v2i32

Now that unaligned access expansion should not attempt
to produce i64 accesses, we can remove the hack in
PreprocessISelDAG where this is done.

This allows splitting i64 private accesses while
allowing the new add nodes indexing the vector components
can be folded with the base pointer arithmetic.

llvm-svn: 268293
This commit is contained in:
Matt Arsenault 2016-05-02 20:07:26 +00:00
parent 5002a67ef2
commit 2b957b5a6f
6 changed files with 26 additions and 73 deletions

View File

@ -1586,61 +1586,6 @@ void AMDGPUDAGToDAGISel::PreprocessISelDAG() {
} }
} }
} }
// XXX - Other targets seem to be able to do this without a worklist.
SmallVector<LoadSDNode *, 8> LoadsToReplace;
SmallVector<StoreSDNode *, 8> StoresToReplace;
for (SDNode &Node : CurDAG->allnodes()) {
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(&Node)) {
EVT VT = LD->getValueType(0);
if (VT != MVT::i64 || LD->getExtensionType() != ISD::NON_EXTLOAD)
continue;
// To simplify the TableGen patters, we replace all i64 loads with v2i32
// loads. Alternatively, we could promote i64 loads to v2i32 during DAG
// legalization, however, so places (ExpandUnalignedLoad) in the DAG
// legalizer assume that if i64 is legal, so doing this promotion early
// can cause problems.
LoadsToReplace.push_back(LD);
} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(&Node)) {
// Handle i64 stores here for the same reason mentioned above for loads.
SDValue Value = ST->getValue();
if (Value.getValueType() != MVT::i64 || ST->isTruncatingStore())
continue;
StoresToReplace.push_back(ST);
}
}
for (LoadSDNode *LD : LoadsToReplace) {
SDLoc SL(LD);
SDValue NewLoad = CurDAG->getLoad(MVT::v2i32, SL, LD->getChain(),
LD->getBasePtr(), LD->getMemOperand());
SDValue BitCast = CurDAG->getNode(ISD::BITCAST, SL,
MVT::i64, NewLoad);
CurDAG->ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLoad.getValue(1));
CurDAG->ReplaceAllUsesOfValueWith(SDValue(LD, 0), BitCast);
Modified = true;
}
for (StoreSDNode *ST : StoresToReplace) {
SDValue NewValue = CurDAG->getNode(ISD::BITCAST, SDLoc(ST),
MVT::v2i32, ST->getValue());
const SDValue StoreOps[] = {
ST->getChain(),
NewValue,
ST->getBasePtr(),
ST->getOffset()
};
CurDAG->UpdateNodeOperands(ST, StoreOps);
Modified = true;
}
// XXX - Is this necessary?
if (Modified)
CurDAG->RemoveDeadNodes();
} }
void AMDGPUDAGToDAGISel::PostprocessISelDAG() { void AMDGPUDAGToDAGISel::PostprocessISelDAG() {

View File

@ -95,12 +95,24 @@ SITargetLowering::SITargetLowering(TargetMachine &TM,
setOperationAction(ISD::LOAD, MVT::v8i32, Custom); setOperationAction(ISD::LOAD, MVT::v8i32, Custom);
setOperationAction(ISD::LOAD, MVT::v16i32, Custom); setOperationAction(ISD::LOAD, MVT::v16i32, Custom);
setOperationAction(ISD::LOAD, MVT::f64, Promote);
AddPromotedToType(ISD::LOAD, MVT::f64, MVT::v2i32);
setOperationAction(ISD::LOAD, MVT::i64, Promote);
AddPromotedToType(ISD::LOAD, MVT::i64, MVT::v2i32);
setOperationAction(ISD::STORE, MVT::v8i32, Custom); setOperationAction(ISD::STORE, MVT::v8i32, Custom);
setOperationAction(ISD::STORE, MVT::v16i32, Custom); setOperationAction(ISD::STORE, MVT::v16i32, Custom);
setOperationAction(ISD::STORE, MVT::i1, Custom); setOperationAction(ISD::STORE, MVT::i1, Custom);
setOperationAction(ISD::STORE, MVT::v4i32, Custom); setOperationAction(ISD::STORE, MVT::v4i32, Custom);
setOperationAction(ISD::STORE, MVT::f64, Promote);
AddPromotedToType(ISD::STORE, MVT::f64, MVT::v2i32);
setOperationAction(ISD::STORE, MVT::i64, Promote);
AddPromotedToType(ISD::STORE, MVT::i64, MVT::v2i32);
setOperationAction(ISD::SELECT, MVT::i64, Custom); setOperationAction(ISD::SELECT, MVT::i64, Custom);
setOperationAction(ISD::SELECT, MVT::f64, Promote); setOperationAction(ISD::SELECT, MVT::f64, Promote);
AddPromotedToType(ISD::SELECT, MVT::f64, MVT::i64); AddPromotedToType(ISD::SELECT, MVT::f64, MVT::i64);

View File

@ -14,7 +14,7 @@ define void @materialize_0_i32(i32 addrspace(1)* %out) {
; GCN-LABEL: {{^}}materialize_0_i64: ; GCN-LABEL: {{^}}materialize_0_i64:
; GCN: v_mov_b32_e32 v[[LOK:[0-9]+]], 0{{$}} ; GCN: v_mov_b32_e32 v[[LOK:[0-9]+]], 0{{$}}
; GCN: v_mov_b32_e32 v[[HIK:[0-9]+]], 0{{$}} ; GCN: v_mov_b32_e32 v[[HIK:[0-9]+]], v[[LOK]]{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[LOK]]:[[HIK]]{{\]}} ; GCN: buffer_store_dwordx2 v{{\[}}[[LOK]]:[[HIK]]{{\]}}
define void @materialize_0_i64(i64 addrspace(1)* %out) { define void @materialize_0_i64(i64 addrspace(1)* %out) {
store i64 0, i64 addrspace(1)* %out store i64 0, i64 addrspace(1)* %out
@ -31,7 +31,7 @@ define void @materialize_neg1_i32(i32 addrspace(1)* %out) {
; GCN-LABEL: {{^}}materialize_neg1_i64: ; GCN-LABEL: {{^}}materialize_neg1_i64:
; GCN: v_mov_b32_e32 v[[LOK:[0-9]+]], -1{{$}} ; GCN: v_mov_b32_e32 v[[LOK:[0-9]+]], -1{{$}}
; GCN: v_mov_b32_e32 v[[HIK:[0-9]+]], -1{{$}} ; GCN: v_mov_b32_e32 v[[HIK:[0-9]+]], v[[LOK]]{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[LOK]]:[[HIK]]{{\]}} ; GCN: buffer_store_dwordx2 v{{\[}}[[LOK]]:[[HIK]]{{\]}}
define void @materialize_neg1_i64(i64 addrspace(1)* %out) { define void @materialize_neg1_i64(i64 addrspace(1)* %out) {
store i64 -1, i64 addrspace(1)* %out store i64 -1, i64 addrspace(1)* %out

View File

@ -172,7 +172,7 @@ define void @s_test_canonicalize_var_f64(double addrspace(1)* %out, double %val)
; GCN-LABEL: {{^}}test_fold_canonicalize_p0_f64: ; GCN-LABEL: {{^}}test_fold_canonicalize_p0_f64:
; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}} ; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}} ; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], v[[LO]]{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}} ; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
define void @test_fold_canonicalize_p0_f64(double addrspace(1)* %out) #1 { define void @test_fold_canonicalize_p0_f64(double addrspace(1)* %out) #1 {
%canonicalized = call double @llvm.canonicalize.f64(double 0.0) %canonicalized = call double @llvm.canonicalize.f64(double 0.0)
@ -225,7 +225,7 @@ define void @test_fold_canonicalize_literal_f64(double addrspace(1)* %out) #1 {
; DENORM-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0xfffff{{$}} ; DENORM-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0xfffff{{$}}
; NODENORM: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}} ; NODENORM: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
; NODENORM: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}} ; NODENORM: v_mov_b32_e32 v[[HI:[0-9]+]], v[[LO]]{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}} ; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
define void @test_fold_canonicalize_denormal0_f64(double addrspace(1)* %out) #1 { define void @test_fold_canonicalize_denormal0_f64(double addrspace(1)* %out) #1 {
%canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 4503599627370495 to double)) %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 4503599627370495 to double))
@ -238,7 +238,7 @@ define void @test_fold_canonicalize_denormal0_f64(double addrspace(1)* %out) #1
; DENORM-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x800fffff{{$}} ; DENORM-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x800fffff{{$}}
; NODENORM: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}} ; NODENORM: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
; NODENORM: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}} ; NODENORM: v_mov_b32_e32 v[[HI:[0-9]+]], v[[LO]]{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}} ; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
define void @test_fold_canonicalize_denormal1_f64(double addrspace(1)* %out) #1 { define void @test_fold_canonicalize_denormal1_f64(double addrspace(1)* %out) #1 {
%canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9227875636482146303 to double)) %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9227875636482146303 to double))

View File

@ -510,7 +510,7 @@ define void @add_inline_imm_64_f64(double addrspace(1)* %out, double %x) {
; CHECK-LABEL: {{^}}store_inline_imm_0.0_f64: ; CHECK-LABEL: {{^}}store_inline_imm_0.0_f64:
; CHECK: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0 ; CHECK: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0
; CHECK: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0 ; CHECK: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], v[[LO_VREG]]{{$}}
; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}} ; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
define void @store_inline_imm_0.0_f64(double addrspace(1)* %out) { define void @store_inline_imm_0.0_f64(double addrspace(1)* %out) {
store double 0.0, double addrspace(1)* %out store double 0.0, double addrspace(1)* %out

View File

@ -4,12 +4,11 @@
; GCN-LABEL: {{^}}v_uextract_bit_31_i128: ; GCN-LABEL: {{^}}v_uextract_bit_31_i128:
; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} ; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], 0{{$}}
; GCN: v_mov_b32_e32 v[[ZERO0:[0-9]+]], 0{{$}} ; GCN: v_mov_b32_e32 v[[ZERO0:[0-9]+]], 0{{$}}
; GCN: v_mov_b32_e32 v[[ZERO2:[0-9]+]], 0{{$}} ; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO0]]{{$}}
; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]] ; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]]
; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[ZERO1]]:[[ZERO2]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} ; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[ZERO0]]:[[ZERO1]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO0]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} ; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO0]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; GCN: s_endpgm ; GCN: s_endpgm
define void @v_uextract_bit_31_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %in) #1 { define void @v_uextract_bit_31_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %in) #1 {
@ -27,12 +26,11 @@ define void @v_uextract_bit_31_i128(i128 addrspace(1)* %out, i128 addrspace(1)*
; GCN-LABEL: {{^}}v_uextract_bit_63_i128: ; GCN-LABEL: {{^}}v_uextract_bit_63_i128:
; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} ; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], 0{{$}}
; GCN: v_mov_b32_e32 v[[ZERO2:[0-9]+]], 0{{$}}
; GCN: v_mov_b32_e32 v[[ZERO0:[0-9]+]], 0{{$}} ; GCN: v_mov_b32_e32 v[[ZERO0:[0-9]+]], 0{{$}}
; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO0]]{{$}}
; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]] ; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]]
; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[ZERO1]]:[[ZERO2]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} ; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[ZERO0]]:[[ZERO1]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO0]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} ; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO0]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; GCN: s_endpgm ; GCN: s_endpgm
define void @v_uextract_bit_63_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %in) #1 { define void @v_uextract_bit_63_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %in) #1 {
@ -50,12 +48,11 @@ define void @v_uextract_bit_63_i128(i128 addrspace(1)* %out, i128 addrspace(1)*
; GCN-LABEL: {{^}}v_uextract_bit_95_i128: ; GCN-LABEL: {{^}}v_uextract_bit_95_i128:
; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} ; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], 0{{$}}
; GCN: v_mov_b32_e32 v[[ZERO2:[0-9]+]], 0{{$}}
; GCN: v_mov_b32_e32 v[[ZERO0:[0-9]+]], 0{{$}} ; GCN: v_mov_b32_e32 v[[ZERO0:[0-9]+]], 0{{$}}
; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO0]]{{$}}
; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]] ; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]]
; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[ZERO1]]:[[ZERO2]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} ; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[ZERO0]]:[[ZERO1]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO0]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} ; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO0]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; GCN: s_endpgm ; GCN: s_endpgm
define void @v_uextract_bit_95_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %in) #1 { define void @v_uextract_bit_95_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %in) #1 {
@ -73,12 +70,11 @@ define void @v_uextract_bit_95_i128(i128 addrspace(1)* %out, i128 addrspace(1)*
; GCN-LABEL: {{^}}v_uextract_bit_127_i128: ; GCN-LABEL: {{^}}v_uextract_bit_127_i128:
; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}} ; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}}
; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], 0{{$}}
; GCN: v_mov_b32_e32 v[[ZERO2:[0-9]+]], 0{{$}}
; GCN: v_mov_b32_e32 v[[ZERO0:[0-9]+]], 0{{$}} ; GCN: v_mov_b32_e32 v[[ZERO0:[0-9]+]], 0{{$}}
; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO0]]{{$}}
; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]] ; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]]
; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[ZERO1]]:[[ZERO2]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} ; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[ZERO0]]:[[ZERO1]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO0]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} ; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO0]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; GCN: s_endpgm ; GCN: s_endpgm
define void @v_uextract_bit_127_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %in) #1 { define void @v_uextract_bit_127_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %in) #1 {