diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 015cc8f0b20a..8d78bcf582d9 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -1586,61 +1586,6 @@ void AMDGPUDAGToDAGISel::PreprocessISelDAG() { } } } - - // XXX - Other targets seem to be able to do this without a worklist. - SmallVector LoadsToReplace; - SmallVector StoresToReplace; - - for (SDNode &Node : CurDAG->allnodes()) { - if (LoadSDNode *LD = dyn_cast(&Node)) { - EVT VT = LD->getValueType(0); - if (VT != MVT::i64 || LD->getExtensionType() != ISD::NON_EXTLOAD) - continue; - - // To simplify the TableGen patters, we replace all i64 loads with v2i32 - // loads. Alternatively, we could promote i64 loads to v2i32 during DAG - // legalization, however, so places (ExpandUnalignedLoad) in the DAG - // legalizer assume that if i64 is legal, so doing this promotion early - // can cause problems. - LoadsToReplace.push_back(LD); - } else if (StoreSDNode *ST = dyn_cast(&Node)) { - // Handle i64 stores here for the same reason mentioned above for loads. - SDValue Value = ST->getValue(); - if (Value.getValueType() != MVT::i64 || ST->isTruncatingStore()) - continue; - StoresToReplace.push_back(ST); - } - } - - for (LoadSDNode *LD : LoadsToReplace) { - SDLoc SL(LD); - - SDValue NewLoad = CurDAG->getLoad(MVT::v2i32, SL, LD->getChain(), - LD->getBasePtr(), LD->getMemOperand()); - SDValue BitCast = CurDAG->getNode(ISD::BITCAST, SL, - MVT::i64, NewLoad); - CurDAG->ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLoad.getValue(1)); - CurDAG->ReplaceAllUsesOfValueWith(SDValue(LD, 0), BitCast); - Modified = true; - } - - for (StoreSDNode *ST : StoresToReplace) { - SDValue NewValue = CurDAG->getNode(ISD::BITCAST, SDLoc(ST), - MVT::v2i32, ST->getValue()); - const SDValue StoreOps[] = { - ST->getChain(), - NewValue, - ST->getBasePtr(), - ST->getOffset() - }; - - CurDAG->UpdateNodeOperands(ST, StoreOps); - Modified = true; - } - - // XXX - Is this necessary? - if (Modified) - CurDAG->RemoveDeadNodes(); } void AMDGPUDAGToDAGISel::PostprocessISelDAG() { diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index a79f689907ef..ec0a032fbf1d 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -95,12 +95,24 @@ SITargetLowering::SITargetLowering(TargetMachine &TM, setOperationAction(ISD::LOAD, MVT::v8i32, Custom); setOperationAction(ISD::LOAD, MVT::v16i32, Custom); + setOperationAction(ISD::LOAD, MVT::f64, Promote); + AddPromotedToType(ISD::LOAD, MVT::f64, MVT::v2i32); + + setOperationAction(ISD::LOAD, MVT::i64, Promote); + AddPromotedToType(ISD::LOAD, MVT::i64, MVT::v2i32); + setOperationAction(ISD::STORE, MVT::v8i32, Custom); setOperationAction(ISD::STORE, MVT::v16i32, Custom); setOperationAction(ISD::STORE, MVT::i1, Custom); setOperationAction(ISD::STORE, MVT::v4i32, Custom); + setOperationAction(ISD::STORE, MVT::f64, Promote); + AddPromotedToType(ISD::STORE, MVT::f64, MVT::v2i32); + + setOperationAction(ISD::STORE, MVT::i64, Promote); + AddPromotedToType(ISD::STORE, MVT::i64, MVT::v2i32); + setOperationAction(ISD::SELECT, MVT::i64, Custom); setOperationAction(ISD::SELECT, MVT::f64, Promote); AddPromotedToType(ISD::SELECT, MVT::f64, MVT::i64); diff --git a/llvm/test/CodeGen/AMDGPU/bitreverse-inline-immediates.ll b/llvm/test/CodeGen/AMDGPU/bitreverse-inline-immediates.ll index 0d72adc3a8fb..150e3430a5e9 100644 --- a/llvm/test/CodeGen/AMDGPU/bitreverse-inline-immediates.ll +++ b/llvm/test/CodeGen/AMDGPU/bitreverse-inline-immediates.ll @@ -14,7 +14,7 @@ define void @materialize_0_i32(i32 addrspace(1)* %out) { ; GCN-LABEL: {{^}}materialize_0_i64: ; GCN: v_mov_b32_e32 v[[LOK:[0-9]+]], 0{{$}} -; GCN: v_mov_b32_e32 v[[HIK:[0-9]+]], 0{{$}} +; GCN: v_mov_b32_e32 v[[HIK:[0-9]+]], v[[LOK]]{{$}} ; GCN: buffer_store_dwordx2 v{{\[}}[[LOK]]:[[HIK]]{{\]}} define void @materialize_0_i64(i64 addrspace(1)* %out) { store i64 0, i64 addrspace(1)* %out @@ -31,7 +31,7 @@ define void @materialize_neg1_i32(i32 addrspace(1)* %out) { ; GCN-LABEL: {{^}}materialize_neg1_i64: ; GCN: v_mov_b32_e32 v[[LOK:[0-9]+]], -1{{$}} -; GCN: v_mov_b32_e32 v[[HIK:[0-9]+]], -1{{$}} +; GCN: v_mov_b32_e32 v[[HIK:[0-9]+]], v[[LOK]]{{$}} ; GCN: buffer_store_dwordx2 v{{\[}}[[LOK]]:[[HIK]]{{\]}} define void @materialize_neg1_i64(i64 addrspace(1)* %out) { store i64 -1, i64 addrspace(1)* %out diff --git a/llvm/test/CodeGen/AMDGPU/fcanonicalize.ll b/llvm/test/CodeGen/AMDGPU/fcanonicalize.ll index 56b446056cf7..a67e54d81ce5 100644 --- a/llvm/test/CodeGen/AMDGPU/fcanonicalize.ll +++ b/llvm/test/CodeGen/AMDGPU/fcanonicalize.ll @@ -172,7 +172,7 @@ define void @s_test_canonicalize_var_f64(double addrspace(1)* %out, double %val) ; GCN-LABEL: {{^}}test_fold_canonicalize_p0_f64: ; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}} -; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}} +; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], v[[LO]]{{$}} ; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}} define void @test_fold_canonicalize_p0_f64(double addrspace(1)* %out) #1 { %canonicalized = call double @llvm.canonicalize.f64(double 0.0) @@ -225,7 +225,7 @@ define void @test_fold_canonicalize_literal_f64(double addrspace(1)* %out) #1 { ; DENORM-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0xfffff{{$}} ; NODENORM: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}} -; NODENORM: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}} +; NODENORM: v_mov_b32_e32 v[[HI:[0-9]+]], v[[LO]]{{$}} ; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}} define void @test_fold_canonicalize_denormal0_f64(double addrspace(1)* %out) #1 { %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 4503599627370495 to double)) @@ -238,7 +238,7 @@ define void @test_fold_canonicalize_denormal0_f64(double addrspace(1)* %out) #1 ; DENORM-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x800fffff{{$}} ; NODENORM: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}} -; NODENORM: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}} +; NODENORM: v_mov_b32_e32 v[[HI:[0-9]+]], v[[LO]]{{$}} ; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}} define void @test_fold_canonicalize_denormal1_f64(double addrspace(1)* %out) #1 { %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9227875636482146303 to double)) diff --git a/llvm/test/CodeGen/AMDGPU/imm.ll b/llvm/test/CodeGen/AMDGPU/imm.ll index 298cb419e4f4..674eceee8122 100644 --- a/llvm/test/CodeGen/AMDGPU/imm.ll +++ b/llvm/test/CodeGen/AMDGPU/imm.ll @@ -510,7 +510,7 @@ define void @add_inline_imm_64_f64(double addrspace(1)* %out, double %x) { ; CHECK-LABEL: {{^}}store_inline_imm_0.0_f64: ; CHECK: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0 -; CHECK: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0 +; CHECK: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], v[[LO_VREG]]{{$}} ; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}} define void @store_inline_imm_0.0_f64(double addrspace(1)* %out) { store double 0.0, double addrspace(1)* %out diff --git a/llvm/test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll b/llvm/test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll index 60e54b90132e..ea7357d78a80 100644 --- a/llvm/test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll +++ b/llvm/test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll @@ -4,12 +4,11 @@ ; GCN-LABEL: {{^}}v_uextract_bit_31_i128: ; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} -; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], 0{{$}} ; GCN: v_mov_b32_e32 v[[ZERO0:[0-9]+]], 0{{$}} -; GCN: v_mov_b32_e32 v[[ZERO2:[0-9]+]], 0{{$}} +; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO0]]{{$}} ; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]] -; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[ZERO1]]:[[ZERO2]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} +; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[ZERO0]]:[[ZERO1]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} ; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO0]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} ; GCN: s_endpgm define void @v_uextract_bit_31_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %in) #1 { @@ -27,12 +26,11 @@ define void @v_uextract_bit_31_i128(i128 addrspace(1)* %out, i128 addrspace(1)* ; GCN-LABEL: {{^}}v_uextract_bit_63_i128: ; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} -; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], 0{{$}} -; GCN: v_mov_b32_e32 v[[ZERO2:[0-9]+]], 0{{$}} ; GCN: v_mov_b32_e32 v[[ZERO0:[0-9]+]], 0{{$}} +; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO0]]{{$}} ; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]] -; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[ZERO1]]:[[ZERO2]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} +; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[ZERO0]]:[[ZERO1]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} ; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO0]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} ; GCN: s_endpgm define void @v_uextract_bit_63_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %in) #1 { @@ -50,12 +48,11 @@ define void @v_uextract_bit_63_i128(i128 addrspace(1)* %out, i128 addrspace(1)* ; GCN-LABEL: {{^}}v_uextract_bit_95_i128: ; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} -; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], 0{{$}} -; GCN: v_mov_b32_e32 v[[ZERO2:[0-9]+]], 0{{$}} ; GCN: v_mov_b32_e32 v[[ZERO0:[0-9]+]], 0{{$}} +; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO0]]{{$}} ; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]] -; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[ZERO1]]:[[ZERO2]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} +; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[ZERO0]]:[[ZERO1]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} ; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO0]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} ; GCN: s_endpgm define void @v_uextract_bit_95_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %in) #1 { @@ -73,12 +70,11 @@ define void @v_uextract_bit_95_i128(i128 addrspace(1)* %out, i128 addrspace(1)* ; GCN-LABEL: {{^}}v_uextract_bit_127_i128: ; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}} -; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], 0{{$}} -; GCN: v_mov_b32_e32 v[[ZERO2:[0-9]+]], 0{{$}} ; GCN: v_mov_b32_e32 v[[ZERO0:[0-9]+]], 0{{$}} +; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO0]]{{$}} ; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]] -; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[ZERO1]]:[[ZERO2]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} +; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[ZERO0]]:[[ZERO1]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} ; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO0]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} ; GCN: s_endpgm define void @v_uextract_bit_127_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %in) #1 {