forked from OSchip/llvm-project
[AMDGPU] Constant fold Intrinsic::amdgcn_perm
Differential Revision: https://reviews.llvm.org/D102203
This commit is contained in:
parent
0077dce361
commit
22d295f695
|
@ -1470,6 +1470,7 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
|
|||
case Intrinsic::vector_reduce_umin:
|
||||
case Intrinsic::vector_reduce_umax:
|
||||
// Target intrinsics
|
||||
case Intrinsic::amdgcn_perm:
|
||||
case Intrinsic::arm_mve_vctp8:
|
||||
case Intrinsic::arm_mve_vctp16:
|
||||
case Intrinsic::arm_mve_vctp32:
|
||||
|
@ -2702,6 +2703,46 @@ static APFloat ConstantFoldAMDGCNCubeIntrinsic(Intrinsic::ID IntrinsicID,
|
|||
}
|
||||
}
|
||||
|
||||
static Constant *ConstantFoldAMDGCNPermIntrinsic(ArrayRef<Constant *> Operands,
|
||||
Type *Ty) {
|
||||
const APInt *C0, *C1, *C2;
|
||||
if (!getConstIntOrUndef(Operands[0], C0) ||
|
||||
!getConstIntOrUndef(Operands[1], C1) ||
|
||||
!getConstIntOrUndef(Operands[2], C2))
|
||||
return nullptr;
|
||||
|
||||
if (!C2)
|
||||
return UndefValue::get(Ty);
|
||||
|
||||
APInt Val(32, 0);
|
||||
unsigned NumUndefBytes = 0;
|
||||
for (unsigned I = 0; I < 32; I += 8) {
|
||||
unsigned Sel = C2->extractBitsAsZExtValue(8, I);
|
||||
unsigned B = 0;
|
||||
|
||||
if (Sel >= 13)
|
||||
B = 0xff;
|
||||
else if (Sel == 12)
|
||||
B = 0x00;
|
||||
else {
|
||||
const APInt *Src = ((Sel & 10) == 10 || (Sel & 12) == 4) ? C0 : C1;
|
||||
if (!Src)
|
||||
++NumUndefBytes;
|
||||
else if (Sel < 8)
|
||||
B = Src->extractBitsAsZExtValue(8, (Sel & 3) * 8);
|
||||
else
|
||||
B = Src->extractBitsAsZExtValue(1, (Sel & 1) ? 31 : 15) * 0xff;
|
||||
}
|
||||
|
||||
Val.insertBits(B, I, 8);
|
||||
}
|
||||
|
||||
if (NumUndefBytes == 4)
|
||||
return UndefValue::get(Ty);
|
||||
|
||||
return ConstantInt::get(Ty, Val);
|
||||
}
|
||||
|
||||
static Constant *ConstantFoldScalarCall3(StringRef Name,
|
||||
Intrinsic::ID IntrinsicID,
|
||||
Type *Ty,
|
||||
|
@ -2817,6 +2858,9 @@ static Constant *ConstantFoldScalarCall3(StringRef Name,
|
|||
return ConstantInt::get(Ty, C0->shl(ShlAmt) | C1->lshr(LshrAmt));
|
||||
}
|
||||
|
||||
if (IntrinsicID == Intrinsic::amdgcn_perm)
|
||||
return ConstantFoldAMDGCNPermIntrinsic(Operands, Ty);
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,102 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt < %s -instsimplify -S | FileCheck %s
|
||||
|
||||
declare i32 @llvm.amdgcn.perm(i32, i32, i32)
|
||||
|
||||
; src1 = 0x19203a4b (421542475), src2 = 0x5c6d7e8f (1550679695)
|
||||
define void @test(i32* %p) {
|
||||
; CHECK-LABEL: @test(
|
||||
; CHECK-NEXT: store volatile i32 undef, i32* [[P:%.*]], align 4
|
||||
; CHECK-NEXT: store volatile i32 -1887539876, i32* [[P]], align 4
|
||||
; CHECK-NEXT: store volatile i32 2121096267, i32* [[P]], align 4
|
||||
; CHECK-NEXT: store volatile i32 1262100505, i32* [[P]], align 4
|
||||
; CHECK-NEXT: store volatile i32 1550679695, i32* [[P]], align 4
|
||||
; CHECK-NEXT: store volatile i32 421542475, i32* [[P]], align 4
|
||||
; CHECK-NEXT: store volatile i32 545143439, i32* [[P]], align 4
|
||||
; CHECK-NEXT: store volatile i32 16711935, i32* [[P]], align 4
|
||||
; CHECK-NEXT: store volatile i32 16711935, i32* [[P]], align 4
|
||||
; CHECK-NEXT: store volatile i32 436174336, i32* [[P]], align 4
|
||||
; CHECK-NEXT: store volatile i32 16711680, i32* [[P]], align 4
|
||||
; CHECK-NEXT: store volatile i32 -1, i32* [[P]], align 4
|
||||
; CHECK-NEXT: store volatile i32 -1, i32* [[P]], align 4
|
||||
; CHECK-NEXT: store volatile i32 -1, i32* [[P]], align 4
|
||||
; CHECK-NEXT: store volatile i32 undef, i32* [[P]], align 4
|
||||
; CHECK-NEXT: store volatile i32 421542475, i32* [[P]], align 4
|
||||
; CHECK-NEXT: store volatile i32 1550679695, i32* [[P]], align 4
|
||||
; CHECK-NEXT: store volatile i32 undef, i32* [[P]], align 4
|
||||
; CHECK-NEXT: store volatile i32 143, i32* [[P]], align 4
|
||||
; CHECK-NEXT: store volatile i32 0, i32* [[P]], align 4
|
||||
; CHECK-NEXT: store volatile i32 255, i32* [[P]], align 4
|
||||
; CHECK-NEXT: store volatile i32 1550679552, i32* [[P]], align 4
|
||||
; CHECK-NEXT: store volatile i32 75, i32* [[P]], align 4
|
||||
; CHECK-NEXT: store volatile i32 0, i32* [[P]], align 4
|
||||
; CHECK-NEXT: store volatile i32 255, i32* [[P]], align 4
|
||||
; CHECK-NEXT: store volatile i32 65535, i32* [[P]], align 4
|
||||
; CHECK-NEXT: store volatile i32 421542400, i32* [[P]], align 4
|
||||
; CHECK-NEXT: store volatile i32 -16776961, i32* [[P]], align 4
|
||||
; CHECK-NEXT: store volatile i32 255, i32* [[P]], align 4
|
||||
; CHECK-NEXT: store volatile i32 -16777216, i32* [[P]], align 4
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%s1s2_u = call i32 @llvm.amdgcn.perm(i32 421542475, i32 1550679695, i32 undef)
|
||||
store volatile i32 %s1s2_u, i32* %p
|
||||
%s1s2_0x00010203 = call i32 @llvm.amdgcn.perm(i32 421542475, i32 1550679695, i32 66051)
|
||||
store volatile i32 %s1s2_0x00010203, i32* %p
|
||||
%s1s2_0x01020304 = call i32 @llvm.amdgcn.perm(i32 421542475, i32 1550679695, i32 16909060)
|
||||
store volatile i32 %s1s2_0x01020304, i32* %p
|
||||
%s1s2_0x04050607 = call i32 @llvm.amdgcn.perm(i32 421542475, i32 1550679695, i32 67438087)
|
||||
store volatile i32 %s1s2_0x04050607, i32* %p
|
||||
%s1s2_0x03020100 = call i32 @llvm.amdgcn.perm(i32 421542475, i32 1550679695, i32 50462976)
|
||||
store volatile i32 %s1s2_0x03020100, i32* %p
|
||||
%s1s2_0x07060504 = call i32 @llvm.amdgcn.perm(i32 421542475, i32 1550679695, i32 117835012)
|
||||
store volatile i32 %s1s2_0x07060504, i32* %p
|
||||
%s1s2_0x06010500 = call i32 @llvm.amdgcn.perm(i32 421542475, i32 1550679695, i32 100730112)
|
||||
store volatile i32 %s1s2_0x06010500, i32* %p
|
||||
%s1s2_0x0c0f0c0f = call i32 @llvm.amdgcn.perm(i32 421542475, i32 1550679695, i32 202312719)
|
||||
store volatile i32 %s1s2_0x0c0f0c0f, i32* %p
|
||||
%u1u2_0x0c0f0c0f = call i32 @llvm.amdgcn.perm(i32 undef, i32 undef, i32 202312719)
|
||||
store volatile i32 %u1u2_0x0c0f0c0f, i32* %p
|
||||
%s1s2_0x070d010c = call i32 @llvm.amdgcn.perm(i32 421542475, i32 1550679695, i32 118292748)
|
||||
store volatile i32 %s1s2_0x070d010c, i32* %p
|
||||
%u1u2_0x070d010c = call i32 @llvm.amdgcn.perm(i32 undef, i32 undef, i32 118292748)
|
||||
store volatile i32 %u1u2_0x070d010c, i32* %p
|
||||
%s1s2_0x80818283 = call i32 @llvm.amdgcn.perm(i32 421542475, i32 1550679695, i32 2155971203)
|
||||
store volatile i32 %s1s2_0x80818283, i32* %p
|
||||
%u1u2_0x80818283 = call i32 @llvm.amdgcn.perm(i32 undef, i32 undef, i32 2155971203)
|
||||
store volatile i32 %u1u2_0x80818283, i32* %p
|
||||
%u1u2_0x0e0e0e0e = call i32 @llvm.amdgcn.perm(i32 undef, i32 undef, i32 235802126)
|
||||
store volatile i32 %u1u2_0x0e0e0e0e, i32* %p
|
||||
%u1s2_0x07060504 = call i32 @llvm.amdgcn.perm(i32 undef, i32 1550679695, i32 117835012)
|
||||
store volatile i32 %u1s2_0x07060504, i32* %p
|
||||
%s1u2_0x07060504 = call i32 @llvm.amdgcn.perm(i32 421542475, i32 undef, i32 117835012)
|
||||
store volatile i32 %s1u2_0x07060504, i32* %p
|
||||
%u1s2_0x03020100 = call i32 @llvm.amdgcn.perm(i32 undef, i32 1550679695, i32 50462976)
|
||||
store volatile i32 %u1s2_0x03020100, i32* %p
|
||||
%s1u2_0x03020100 = call i32 @llvm.amdgcn.perm(i32 421542475, i32 undef, i32 50462976)
|
||||
store volatile i32 %s1u2_0x03020100, i32* %p
|
||||
%u1s2_0x07060500 = call i32 @llvm.amdgcn.perm(i32 undef, i32 1550679695, i32 117835008)
|
||||
store volatile i32 %u1s2_0x07060500, i32* %p
|
||||
%u1s2_0x0706050c = call i32 @llvm.amdgcn.perm(i32 undef, i32 1550679695, i32 117835020)
|
||||
store volatile i32 %u1s2_0x0706050c, i32* %p
|
||||
%u1s2_0x0706050d = call i32 @llvm.amdgcn.perm(i32 undef, i32 1550679695, i32 117835021)
|
||||
store volatile i32 %u1s2_0x0706050d, i32* %p
|
||||
%u1s2_0x03020104 = call i32 @llvm.amdgcn.perm(i32 undef, i32 1550679695, i32 50462980)
|
||||
store volatile i32 %u1s2_0x03020104, i32* %p
|
||||
%s1u2_0x03020104 = call i32 @llvm.amdgcn.perm(i32 421542475, i32 undef, i32 50462980)
|
||||
store volatile i32 %s1u2_0x03020104, i32* %p
|
||||
%s1u2_0x0302010c = call i32 @llvm.amdgcn.perm(i32 421542475, i32 undef, i32 50462988)
|
||||
store volatile i32 %s1u2_0x0302010c, i32* %p
|
||||
%s1u2_0x0302010e = call i32 @llvm.amdgcn.perm(i32 421542475, i32 undef, i32 50462990)
|
||||
store volatile i32 %s1u2_0x0302010e, i32* %p
|
||||
%s1u2_0x03020f0e = call i32 @llvm.amdgcn.perm(i32 421542475, i32 undef, i32 50466574)
|
||||
store volatile i32 %s1u2_0x03020f0e, i32* %p
|
||||
%s1u2_0x07060500 = call i32 @llvm.amdgcn.perm(i32 421542475, i32 undef, i32 117835008)
|
||||
store volatile i32 %s1u2_0x07060500, i32* %p
|
||||
%_0x81000100_0x01008100_0x0b0a0908 = call i32 @llvm.amdgcn.perm(i32 2164261120, i32 16810240, i32 185207048)
|
||||
store volatile i32 %_0x81000100_0x01008100_0x0b0a0908, i32* %p
|
||||
%_u1_0x01008100_0x0b0a0908 = call i32 @llvm.amdgcn.perm(i32 undef, i32 16810240, i32 185207048)
|
||||
store volatile i32 %_u1_0x01008100_0x0b0a0908, i32* %p
|
||||
%_0x81000100_u2_0x0b0a0908 = call i32 @llvm.amdgcn.perm(i32 2164261120, i32 undef, i32 185207048)
|
||||
store volatile i32 %_0x81000100_u2_0x0b0a0908, i32* %p
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue