forked from OSchip/llvm-project
[AMDGPU] Corrected computeKnownBits for V_PERM_B32
Differential Revision: https://reviews.llvm.org/D48133 llvm-svn: 334640
This commit is contained in:
parent
2d28383097
commit
7bec57300c
|
@ -4317,18 +4317,19 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode(
|
|||
unsigned Sel = CMask->getZExtValue();
|
||||
|
||||
for (unsigned I = 0; I < 32; I += 8) {
|
||||
unsigned ByteMask = 0xff << I;
|
||||
unsigned SelBits = Sel & 0xff;
|
||||
if (SelBits < 4) {
|
||||
Known.One |= RHSKnown.One & ByteMask;
|
||||
Known.Zero |= RHSKnown.Zero & ByteMask;
|
||||
SelBits *= 8;
|
||||
Known.One |= ((RHSKnown.One.getZExtValue() >> SelBits) & 0xff) << I;
|
||||
Known.Zero |= ((RHSKnown.Zero.getZExtValue() >> SelBits) & 0xff) << I;
|
||||
} else if (SelBits < 7) {
|
||||
Known.One |= LHSKnown.One & ByteMask;
|
||||
Known.Zero |= LHSKnown.Zero & ByteMask;
|
||||
SelBits = (SelBits & 3) * 8;
|
||||
Known.One |= ((LHSKnown.One.getZExtValue() >> SelBits) & 0xff) << I;
|
||||
Known.Zero |= ((LHSKnown.Zero.getZExtValue() >> SelBits) & 0xff) << I;
|
||||
} else if (SelBits == 0x0c) {
|
||||
Known.Zero |= ByteMask;
|
||||
Known.Zero |= 0xff << I;
|
||||
} else if (SelBits > 0x0c) {
|
||||
Known.One |= ByteMask;
|
||||
Known.One |= 0xff << I;
|
||||
}
|
||||
Sel >>= 8;
|
||||
}
|
||||
|
|
|
@ -196,4 +196,26 @@ bb:
|
|||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}known_ffff8004:
|
||||
; GCN-DAG: v_mov_b32_e32 [[MASK:v[0-9]+]], 0xffff0500
|
||||
; GCN-DAG: v_mov_b32_e32 [[RES:v[0-9]+]], 0xffff8004
|
||||
; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
|
||||
; GCN: store_dword v[{{[0-9:]+}}], [[RES]]{{$}}
|
||||
define amdgpu_kernel void @known_ffff8004(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
|
||||
bb:
|
||||
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
|
||||
%load = load i32, i32 addrspace(1)* %gep, align 4
|
||||
%mask1 = or i32 %arg1, 4
|
||||
%mask2 = or i32 %load, 32768 ; 0x8000
|
||||
%and = and i32 %mask1, 16711935 ; 0x00ff00ff
|
||||
%tmp1 = and i32 %mask2, 4294967040 ; 0xffffff00
|
||||
%tmp2 = or i32 %tmp1, 4294901760 ; 0xffff0000
|
||||
%tmp3 = or i32 %tmp2, %and
|
||||
store i32 %tmp3, i32 addrspace(1)* %gep, align 4
|
||||
%v = and i32 %tmp3, 4294934532 ; 0xffff8004
|
||||
store i32 %v, i32 addrspace(1)* %arg, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
declare i32 @llvm.amdgcn.workitem.id.x()
|
||||
|
|
Loading…
Reference in New Issue