forked from OSchip/llvm-project
DAGCombiner: Reduce 64-bit BFE pattern to pattern on 32-bit component
If the extracted bits are restricted to the upper half or lower half, this can be truncated. llvm-svn: 267024
This commit is contained in:
parent
a98c7ead30
commit
8d1052f55c
|
@ -2969,6 +2969,50 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1,
|
|||
}
|
||||
}
|
||||
|
||||
// Reduce bit extract of low half of an integer to the narrower type.
|
||||
// (and (srl i64:x, K), KMask) ->
|
||||
// (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
|
||||
if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
|
||||
if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
|
||||
if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
|
||||
unsigned Size = VT.getSizeInBits();
|
||||
const APInt &AndMask = CAnd->getAPIntValue();
|
||||
unsigned ShiftBits = CShift->getZExtValue();
|
||||
unsigned MaskBits = AndMask.countTrailingOnes();
|
||||
EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
|
||||
|
||||
if (APIntOps::isMask(AndMask) &&
|
||||
// Required bits must not span the two halves of the integer and
|
||||
// must fit in the half size type.
|
||||
(ShiftBits + MaskBits <= Size / 2) &&
|
||||
TLI.isNarrowingProfitable(VT, HalfVT) &&
|
||||
TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
|
||||
TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
|
||||
TLI.isTruncateFree(VT, HalfVT) &&
|
||||
TLI.isZExtFree(HalfVT, VT)) {
|
||||
// The isNarrowingProfitable is to avoid regressions on PPC and
|
||||
// AArch64 which match a few 64-bit bit insert / bit extract patterns
|
||||
// on downstream users of this. Those patterns could probably be
|
||||
// extended to handle extensions mixed in.
|
||||
|
||||
SDValue SL(N0);
|
||||
assert(ShiftBits != 0 && MaskBits <= Size);
|
||||
|
||||
// Extracting the highest bit of the low half.
|
||||
EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
|
||||
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
|
||||
N0.getOperand(0));
|
||||
|
||||
SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
|
||||
SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
|
||||
SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
|
||||
SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
|
||||
return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
|
|
|
@ -41,7 +41,7 @@ define i32 @bar(i64 %cav1.coerce) nounwind {
|
|||
|
||||
define void @fct1(%struct.Z* nocapture %x, %struct.A* nocapture %y) nounwind optsize ssp {
|
||||
; CHECK-LABEL: fct1:
|
||||
; CHECK: ubfx
|
||||
; CHECK: ubfx x{{[0-9]+}}, x{{[0-9]+}}
|
||||
; CHECK-NOT: and
|
||||
; CHECK: ret
|
||||
|
||||
|
|
|
@ -223,12 +223,10 @@ ret:
|
|||
|
||||
; GCN: s_cbranch_vccnz BB4_2
|
||||
|
||||
; GCN: s_lshr_b64 s{{\[}}[[LO:[0-9]+]]:{{[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}}, 15
|
||||
; GCN: s_and_b32 s{{[0-9]+}}, s[[LO]], 0xff
|
||||
; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x8000f
|
||||
|
||||
; GCN: BB4_2:
|
||||
; GCN: s_lshr_b64 s{{\[}}[[LO:[0-9]+]]:{{[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}}, 15
|
||||
; GCN: s_and_b32 s{{[0-9]+}}, s[[LO]], 0x7f
|
||||
; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x7000f
|
||||
|
||||
; GCN: BB4_3:
|
||||
; GCN: buffer_store_dwordx2
|
||||
|
|
|
@ -0,0 +1,124 @@
|
|||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
; Extract the high bit of the 1st quarter
|
||||
; GCN-LABEL: {{^}}v_uextract_bit_31_i128:
|
||||
; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
|
||||
; GCN: v_mov_b32_e32 v[[ZERO0:[0-9]+]], 0{{$}}
|
||||
; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], 0{{$}}
|
||||
; GCN: v_mov_b32_e32 v[[ZERO2:[0-9]+]], 0{{$}}
|
||||
; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]]
|
||||
|
||||
; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[ZERO1]]:[[ZERO2]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
|
||||
; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO0]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; GCN: s_endpgm
|
||||
define void @v_uextract_bit_31_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %in) #1 {
|
||||
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i128, i128 addrspace(1)* %in, i32 %id.x
|
||||
%out.gep = getelementptr i128, i128 addrspace(1)* %out, i32 %id.x
|
||||
%ld.64 = load i128, i128 addrspace(1)* %in.gep
|
||||
%srl = lshr i128 %ld.64, 31
|
||||
%bit = and i128 %srl, 1
|
||||
store i128 %bit, i128 addrspace(1)* %out.gep
|
||||
ret void
|
||||
}
|
||||
|
||||
; Extract the high bit of the 2nd quarter
|
||||
; GCN-LABEL: {{^}}v_uextract_bit_63_i128:
|
||||
; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
|
||||
|
||||
; GCN: v_mov_b32_e32 v[[ZERO0:[0-9]+]], 0{{$}}
|
||||
; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], 0{{$}}
|
||||
; GCN: v_mov_b32_e32 v[[ZERO2:[0-9]+]], 0{{$}}
|
||||
; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]]
|
||||
|
||||
; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[ZERO1]]:[[ZERO2]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
|
||||
; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO0]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; GCN: s_endpgm
|
||||
define void @v_uextract_bit_63_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %in) #1 {
|
||||
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i128, i128 addrspace(1)* %in, i32 %id.x
|
||||
%out.gep = getelementptr i128, i128 addrspace(1)* %out, i32 %id.x
|
||||
%ld.64 = load i128, i128 addrspace(1)* %in.gep
|
||||
%srl = lshr i128 %ld.64, 63
|
||||
%bit = and i128 %srl, 1
|
||||
store i128 %bit, i128 addrspace(1)* %out.gep
|
||||
ret void
|
||||
}
|
||||
|
||||
; Extract the high bit of the 3rd quarter
|
||||
; GCN-LABEL: {{^}}v_uextract_bit_95_i128:
|
||||
; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
|
||||
|
||||
; GCN: v_mov_b32_e32 v[[ZERO0:[0-9]+]], 0{{$}}
|
||||
; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], 0{{$}}
|
||||
; GCN: v_mov_b32_e32 v[[ZERO2:[0-9]+]], 0{{$}}
|
||||
; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]]
|
||||
|
||||
; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[ZERO1]]:[[ZERO2]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
|
||||
; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO0]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; GCN: s_endpgm
|
||||
define void @v_uextract_bit_95_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %in) #1 {
|
||||
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i128, i128 addrspace(1)* %in, i32 %id.x
|
||||
%out.gep = getelementptr i128, i128 addrspace(1)* %out, i32 %id.x
|
||||
%ld.64 = load i128, i128 addrspace(1)* %in.gep
|
||||
%srl = lshr i128 %ld.64, 95
|
||||
%bit = and i128 %srl, 1
|
||||
store i128 %bit, i128 addrspace(1)* %out.gep
|
||||
ret void
|
||||
}
|
||||
|
||||
; Extract the high bit of the 4th quarter
|
||||
; GCN-LABEL: {{^}}v_uextract_bit_127_i128:
|
||||
; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}}
|
||||
|
||||
; GCN: v_mov_b32_e32 v[[ZERO0:[0-9]+]], 0{{$}}
|
||||
; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], 0{{$}}
|
||||
; GCN: v_mov_b32_e32 v[[ZERO2:[0-9]+]], 0{{$}}
|
||||
; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]]
|
||||
|
||||
; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[ZERO1]]:[[ZERO2]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
|
||||
; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO0]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; GCN: s_endpgm
|
||||
define void @v_uextract_bit_127_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %in) #1 {
|
||||
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i128, i128 addrspace(1)* %in, i32 %id.x
|
||||
%out.gep = getelementptr i128, i128 addrspace(1)* %out, i32 %id.x
|
||||
%ld.64 = load i128, i128 addrspace(1)* %in.gep
|
||||
%srl = lshr i128 %ld.64, 127
|
||||
%bit = and i128 %srl, 1
|
||||
store i128 %bit, i128 addrspace(1)* %out.gep
|
||||
ret void
|
||||
}
|
||||
|
||||
; Spans more than 2 dword boundaries
|
||||
; GCN-LABEL: {{^}}v_uextract_bit_34_100_i128:
|
||||
; GCN: buffer_load_dwordx2 v{{\[}}[[VAL2:[0-9]+]]:[[VAL3:[0-9]+]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
|
||||
; GCN: buffer_load_dword v[[VAL1:[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
|
||||
|
||||
; GCN-DAG: v_lshl_b64 v{{\[}}[[SHLLO:[0-9]+]]:[[SHLHI:[0-9]+]]{{\]}}, v{{\[}}[[VAL2]]:[[VAL3]]{{\]}}, 30
|
||||
; GCN-DAG: v_lshrrev_b32_e32 v[[ELT1PART:[0-9]+]], 2, v[[VAL1]]
|
||||
; GCN-DAG: v_bfe_u32 v[[ELT2PART:[0-9]+]], v[[VAL3]], 2, 2{{$}}
|
||||
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
|
||||
; GCN-DAG: v_or_b32_e32 v[[OR0:[0-9]+]], v[[SHLLO]], v[[ELT1PART]]
|
||||
; GCN-DAG: v_or_b32_e32 v[[OR1:[0-9]+]], 0, v[[SHLHI]]{{$}}
|
||||
|
||||
; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[ELT2PART]]:[[ZERO]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
|
||||
; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[OR0]]:[[OR1]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; GCN: s_endpgm
|
||||
define void @v_uextract_bit_34_100_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %in) #1 {
|
||||
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i128, i128 addrspace(1)* %in, i32 %id.x
|
||||
%out.gep = getelementptr i128, i128 addrspace(1)* %out, i32 %id.x
|
||||
%ld.64 = load i128, i128 addrspace(1)* %in.gep
|
||||
%srl = lshr i128 %ld.64, 34
|
||||
%bit = and i128 %srl, 73786976294838206463
|
||||
store i128 %bit, i128 addrspace(1)* %out.gep
|
||||
ret void
|
||||
}
|
||||
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
|
||||
attributes #0 = { nounwind readnone }
|
||||
attributes #1 = { nounwind }
|
|
@ -0,0 +1,386 @@
|
|||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
; Make sure 64-bit BFE pattern does a 32-bit BFE on the relevant half.
|
||||
|
||||
; Extract the high bit of the low half
|
||||
; GCN-LABEL: {{^}}v_uextract_bit_31_i64:
|
||||
; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]]
|
||||
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
|
||||
; GCN: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO]]{{\]}}
|
||||
define void @v_uextract_bit_31_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
|
||||
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
|
||||
%out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
|
||||
%ld.64 = load i64, i64 addrspace(1)* %in.gep
|
||||
%srl = lshr i64 %ld.64, 31
|
||||
%bit = and i64 %srl, 1
|
||||
store i64 %bit, i64 addrspace(1)* %out.gep
|
||||
ret void
|
||||
}
|
||||
|
||||
; Extract the high bit of the high half
|
||||
; GCN-LABEL: {{^}}v_uextract_bit_63_i64:
|
||||
; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
|
||||
; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]]
|
||||
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
|
||||
; GCN: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO]]{{\]}}
|
||||
define void @v_uextract_bit_63_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
|
||||
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
|
||||
%out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
|
||||
%ld.64 = load i64, i64 addrspace(1)* %in.gep
|
||||
%srl = lshr i64 %ld.64, 63
|
||||
%bit = and i64 %srl, 1
|
||||
store i64 %bit, i64 addrspace(1)* %out.gep
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_uextract_bit_1_i64:
|
||||
; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 1
|
||||
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
|
||||
; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO]]{{\]}}
|
||||
define void @v_uextract_bit_1_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
|
||||
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
|
||||
%out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
|
||||
%ld.64 = load i64, i64 addrspace(1)* %in.gep
|
||||
%srl = lshr i64 %ld.64, 1
|
||||
%bit = and i64 %srl, 1
|
||||
store i64 %bit, i64 addrspace(1)* %out.gep
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_uextract_bit_20_i64:
|
||||
; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 20, 1
|
||||
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
|
||||
; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO]]{{\]}}
|
||||
define void @v_uextract_bit_20_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
|
||||
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
|
||||
%out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
|
||||
%ld.64 = load i64, i64 addrspace(1)* %in.gep
|
||||
%srl = lshr i64 %ld.64, 20
|
||||
%bit = and i64 %srl, 1
|
||||
store i64 %bit, i64 addrspace(1)* %out.gep
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_uextract_bit_32_i64:
|
||||
; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
|
||||
; GCN-DAG: v_and_b32_e32 v[[AND:[0-9]+]], 1, [[VAL]]
|
||||
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
|
||||
; GCN: buffer_store_dwordx2 v{{\[}}[[AND]]:[[ZERO]]{{\]}}
|
||||
define void @v_uextract_bit_32_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
|
||||
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
|
||||
%out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
|
||||
%ld.64 = load i64, i64 addrspace(1)* %in.gep
|
||||
%srl = lshr i64 %ld.64, 32
|
||||
%bit = and i64 %srl, 1
|
||||
store i64 %bit, i64 addrspace(1)* %out.gep
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_uextract_bit_33_i64:
|
||||
; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
|
||||
; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 1{{$}}
|
||||
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
|
||||
; GCN: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO]]{{\]}}
|
||||
define void @v_uextract_bit_33_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
|
||||
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
|
||||
%out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
|
||||
%ld.64 = load i64, i64 addrspace(1)* %in.gep
|
||||
%srl = lshr i64 %ld.64, 33
|
||||
%bit = and i64 %srl, 1
|
||||
store i64 %bit, i64 addrspace(1)* %out.gep
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_uextract_bit_20_21_i64:
|
||||
; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 20, 2
|
||||
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
|
||||
; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO]]{{\]}}
|
||||
define void @v_uextract_bit_20_21_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
|
||||
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
|
||||
%out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
|
||||
%ld.64 = load i64, i64 addrspace(1)* %in.gep
|
||||
%srl = lshr i64 %ld.64, 20
|
||||
%bit = and i64 %srl, 3
|
||||
store i64 %bit, i64 addrspace(1)* %out.gep
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_uextract_bit_1_30_i64:
|
||||
; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 30
|
||||
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
|
||||
; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO]]{{\]}}
|
||||
define void @v_uextract_bit_1_30_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
|
||||
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
|
||||
%out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
|
||||
%ld.64 = load i64, i64 addrspace(1)* %in.gep
|
||||
%srl = lshr i64 %ld.64, 1
|
||||
%bit = and i64 %srl, 1073741823
|
||||
store i64 %bit, i64 addrspace(1)* %out.gep
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_uextract_bit_1_31_i64:
|
||||
; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 1, [[VAL]]
|
||||
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
|
||||
; GCN: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO]]{{\]}}
|
||||
define void @v_uextract_bit_1_31_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
|
||||
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
|
||||
%out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
|
||||
%ld.64 = load i64, i64 addrspace(1)* %in.gep
|
||||
%srl = lshr i64 %ld.64, 1
|
||||
%bit = and i64 %srl, 2147483647
|
||||
store i64 %bit, i64 addrspace(1)* %out.gep
|
||||
ret void
|
||||
}
|
||||
|
||||
; Spans the dword boundary, so requires full shift
|
||||
; GCN-LABEL: {{^}}v_uextract_bit_31_32_i64:
|
||||
; GCN: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]]
|
||||
; GCN: v_lshr_b64 v{{\[}}[[SHRLO:[0-9]+]]:[[SHRHI:[0-9]+]]{{\]}}, [[VAL]], 31
|
||||
; GCN-DAG: v_and_b32_e32 v[[AND:[0-9]+]], 3, v[[SHRLO]]{{$}}
|
||||
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
|
||||
; GCN: buffer_store_dwordx2 v{{\[}}[[AND]]:[[ZERO]]{{\]}}
|
||||
define void @v_uextract_bit_31_32_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
|
||||
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
|
||||
%out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
|
||||
%ld.64 = load i64, i64 addrspace(1)* %in.gep
|
||||
%srl = lshr i64 %ld.64, 31
|
||||
%bit = and i64 %srl, 3
|
||||
store i64 %bit, i64 addrspace(1)* %out.gep
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_uextract_bit_32_33_i64:
|
||||
; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
|
||||
; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 2
|
||||
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
|
||||
; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO]]{{\]}}
|
||||
define void @v_uextract_bit_32_33_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
|
||||
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
|
||||
%out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
|
||||
%ld.64 = load i64, i64 addrspace(1)* %in.gep
|
||||
%srl = lshr i64 %ld.64, 33
|
||||
%bit = and i64 %srl, 3
|
||||
store i64 %bit, i64 addrspace(1)* %out.gep
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_uextract_bit_30_60_i64:
|
||||
; GCN: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]]
|
||||
; GCN: v_lshr_b64 v{{\[}}[[SHRLO:[0-9]+]]:[[SHRHI:[0-9]+]]{{\]}}, [[VAL]], 30
|
||||
; GCN-DAG: v_and_b32_e32 v[[AND:[0-9]+]], 0x3fffffff, v[[SHRLO]]{{$}}
|
||||
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
|
||||
; GCN: buffer_store_dwordx2 v{{\[}}[[AND]]:[[ZERO]]{{\]}}
|
||||
define void @v_uextract_bit_30_60_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
|
||||
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
|
||||
%out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
|
||||
%ld.64 = load i64, i64 addrspace(1)* %in.gep
|
||||
%srl = lshr i64 %ld.64, 30
|
||||
%bit = and i64 %srl, 1073741823
|
||||
store i64 %bit, i64 addrspace(1)* %out.gep
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_uextract_bit_33_63_i64:
|
||||
; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
|
||||
; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 30
|
||||
; GCN-DAG: v_mov_b32_e32 v[[BFE:[0-9]+]], 0{{$}}
|
||||
; GCN: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO]]{{\]}}
|
||||
define void @v_uextract_bit_33_63_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
|
||||
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
|
||||
%out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
|
||||
%ld.64 = load i64, i64 addrspace(1)* %in.gep
|
||||
%srl = lshr i64 %ld.64, 33
|
||||
%bit = and i64 %srl, 1073741823
|
||||
store i64 %bit, i64 addrspace(1)* %out.gep
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_uextract_bit_31_63_i64:
|
||||
; GCN: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]]
|
||||
; GCN: v_lshr_b64 v{{\[}}[[SHRLO:[0-9]+]]:[[SHRHI:[0-9]+]]{{\]}}, [[VAL]], 31
|
||||
; GCN-NEXT: v_mov_b32_e32 v[[SHRHI]], 0{{$}}
|
||||
; GCN: buffer_store_dwordx2 v{{\[}}[[SHRLO]]:[[SHRHI]]{{\]}}
|
||||
define void @v_uextract_bit_31_63_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
|
||||
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
|
||||
%out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
|
||||
%ld.64 = load i64, i64 addrspace(1)* %in.gep
|
||||
%srl = lshr i64 %ld.64, 31
|
||||
%and = and i64 %srl, 4294967295
|
||||
store i64 %and, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; trunc applied before and mask
|
||||
; GCN-LABEL: {{^}}v_uextract_bit_31_i64_trunc_i32:
|
||||
; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]]
|
||||
; GCN: buffer_store_dword v[[SHIFT]]
|
||||
define void @v_uextract_bit_31_i64_trunc_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
|
||||
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
|
||||
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
|
||||
%ld.64 = load i64, i64 addrspace(1)* %in.gep
|
||||
%srl = lshr i64 %ld.64, 31
|
||||
%trunc = trunc i64 %srl to i32
|
||||
%bit = and i32 %trunc, 1
|
||||
store i32 %bit, i32 addrspace(1)* %out.gep
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_uextract_bit_3_i64_trunc_i32:
|
||||
; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; GCN: v_bfe_u32 [[BFE:v[0-9]+]], [[VAL]], 3, 1{{$}}
|
||||
; GCN: buffer_store_dword [[BFE]]
|
||||
define void @v_uextract_bit_3_i64_trunc_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
|
||||
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
|
||||
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
|
||||
%ld.64 = load i64, i64 addrspace(1)* %in.gep
|
||||
%srl = lshr i64 %ld.64, 3
|
||||
%trunc = trunc i64 %srl to i32
|
||||
%bit = and i32 %trunc, 1
|
||||
store i32 %bit, i32 addrspace(1)* %out.gep
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_uextract_bit_33_i64_trunc_i32:
|
||||
; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
|
||||
; GCN: v_bfe_u32 [[BFE:v[0-9]+]], [[VAL]], 1, 1{{$}}
|
||||
; GCN: buffer_store_dword [[BFE]]
|
||||
define void @v_uextract_bit_33_i64_trunc_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
|
||||
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
|
||||
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
|
||||
%ld.64 = load i64, i64 addrspace(1)* %in.gep
|
||||
%srl = lshr i64 %ld.64, 33
|
||||
%trunc = trunc i64 %srl to i32
|
||||
%bit = and i32 %trunc, 1
|
||||
store i32 %bit, i32 addrspace(1)* %out.gep
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_uextract_bit_31_32_i64_trunc_i32:
|
||||
; GCN: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]]
|
||||
; GCN: v_lshr_b64 v{{\[}}[[SHRLO:[0-9]+]]:[[SHRHI:[0-9]+]]{{\]}}, [[VAL]], 31
|
||||
; GCN-NEXT: v_and_b32_e32 v[[SHRLO]], 3, v[[SHRLO]]
|
||||
; GCN-NOT: v[[SHRLO]]
|
||||
; GCN: buffer_store_dword v[[SHRLO]]
|
||||
define void @v_uextract_bit_31_32_i64_trunc_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
|
||||
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
|
||||
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
|
||||
%ld.64 = load i64, i64 addrspace(1)* %in.gep
|
||||
%srl = lshr i64 %ld.64, 31
|
||||
%trunc = trunc i64 %srl to i32
|
||||
%bit = and i32 %trunc, 3
|
||||
store i32 %bit, i32 addrspace(1)* %out.gep
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}and_not_mask_i64:
|
||||
; GCN: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]]
|
||||
; GCN: v_lshr_b64 v{{\[}}[[SHRLO:[0-9]+]]:[[SHRHI:[0-9]+]]{{\]}}, [[VAL]], 20
|
||||
; GCN-DAG: v_and_b32_e32 v[[SHRLO]], 4, v[[SHRLO]]
|
||||
; GCN-DAG: v_mov_b32_e32 v[[SHRHI]], 0{{$}}
|
||||
; GCN-NOT: v[[SHRLO]]
|
||||
; GCN-NOT: v[[SHRHI]]
|
||||
; GCN: buffer_store_dwordx2 v{{\[}}[[SHRLO]]:[[SHRHI]]{{\]}}
|
||||
define void @and_not_mask_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
|
||||
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
|
||||
%out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
|
||||
%ld.64 = load i64, i64 addrspace(1)* %in.gep
|
||||
%srl = lshr i64 %ld.64, 20
|
||||
%bit = and i64 %srl, 4
|
||||
store i64 %bit, i64 addrspace(1)* %out.gep
|
||||
ret void
|
||||
}
|
||||
|
||||
; The instruction count is the same with/without hasOneUse, but
|
||||
; keeping the 32-bit and has a smaller encoding size than the bfe.
|
||||
|
||||
; GCN-LABEL: {{^}}v_uextract_bit_27_29_multi_use_shift_i64:
|
||||
; GCN: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]]
|
||||
; GCN-DAG: v_lshr_b64 v{{\[}}[[SHRLO:[0-9]+]]:[[SHRHI:[0-9]+]]{{\]}}, [[VAL]], 27
|
||||
; GCN-DAG: v_and_b32_e32 v[[AND:[0-9]+]], 3, v[[SHRLO]]
|
||||
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
|
||||
; GCN: buffer_store_dwordx2 v{{\[}}[[SHRLO]]:[[SHRHI]]{{\]}}
|
||||
; GCN: buffer_store_dwordx2 v{{\[}}[[AND]]:[[ZERO]]{{\]}}
|
||||
define void @v_uextract_bit_27_29_multi_use_shift_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
|
||||
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
|
||||
%out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
|
||||
%ld.64 = load i64, i64 addrspace(1)* %in.gep
|
||||
%srl = lshr i64 %ld.64, 27
|
||||
%bit = and i64 %srl, 3
|
||||
store volatile i64 %srl, i64 addrspace(1)* %out
|
||||
store volatile i64 %bit, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_uextract_bit_34_37_multi_use_shift_i64:
|
||||
; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
|
||||
; GCN-DAG: v_lshrrev_b32_e32 v[[SHR:[0-9]+]], 2, [[VAL]]
|
||||
; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 2, 3
|
||||
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
|
||||
; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[SHR]]:[[ZERO]]{{\]}}
|
||||
; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO]]{{\]}}
|
||||
define void @v_uextract_bit_34_37_multi_use_shift_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
|
||||
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
|
||||
%out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
|
||||
%ld.64 = load i64, i64 addrspace(1)* %in.gep
|
||||
%srl = lshr i64 %ld.64, 34
|
||||
%bit = and i64 %srl, 7
|
||||
store volatile i64 %srl, i64 addrspace(1)* %out
|
||||
store volatile i64 %bit, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_uextract_bit_33_36_use_upper_half_shift_i64:
|
||||
; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
|
||||
; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 3
|
||||
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
|
||||
; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO]]{{\]}}
|
||||
; GCN: buffer_store_dword v[[ZERO]]
|
||||
define void @v_uextract_bit_33_36_use_upper_half_shift_i64(i64 addrspace(1)* %out0, i32 addrspace(1)* %out1, i64 addrspace(1)* %in) #1 {
|
||||
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
|
||||
%out0.gep = getelementptr i64, i64 addrspace(1)* %out0, i32 %id.x
|
||||
%out1.gep = getelementptr i32, i32 addrspace(1)* %out1, i32 %id.x
|
||||
%ld.64 = load i64, i64 addrspace(1)* %in.gep
|
||||
%srl = lshr i64 %ld.64, 33
|
||||
%bit = and i64 %srl, 7
|
||||
store volatile i64 %bit, i64 addrspace(1)* %out0.gep
|
||||
|
||||
%srl.srl32 = lshr i64 %srl, 32
|
||||
%srl.hi = trunc i64 %srl.srl32 to i32
|
||||
store volatile i32 %srl.hi, i32 addrspace(1)* %out1.gep
|
||||
ret void
|
||||
}
|
||||
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
|
||||
attributes #0 = { nounwind readnone }
|
||||
attributes #1 = { nounwind }
|
|
@ -311,7 +311,7 @@ define i64 @bextr64b(i64 %x) uwtable ssp {
|
|||
; CHECK-LABEL: bextr64b:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: movl $3076, %eax # imm = 0xC04
|
||||
; CHECK-NEXT: bextrq %rax, %rdi, %rax
|
||||
; CHECK-NEXT: bextrl %eax, %edi, %eax
|
||||
; CHECK-NEXT: retq
|
||||
;
|
||||
%1 = lshr i64 %x, 4
|
||||
|
@ -323,7 +323,7 @@ define i64 @bextr64b_load(i64* %x) {
|
|||
; CHECK-LABEL: bextr64b_load:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: movl $3076, %eax # imm = 0xC04
|
||||
; CHECK-NEXT: bextrq %rax, (%rdi), %rax
|
||||
; CHECK-NEXT: bextrl %eax, (%rdi), %eax
|
||||
; CHECK-NEXT: retq
|
||||
;
|
||||
%1 = load i64, i64* %x, align 8
|
||||
|
|
Loading…
Reference in New Issue