From d82f26cc4bc7cb78f7ef327fa43a93e3d0ff9706 Mon Sep 17 00:00:00 2001 From: Amy Kwan Date: Wed, 24 Jun 2020 15:23:17 -0500 Subject: [PATCH] [PowerPC][Power10] Implement Count Leading/Trailing Zeroes Builtins under bit Mask in LLVM/Clang This patch implements builtins for the following prototypes: unsigned long long __builtin_cntlzdm (unsigned long long, unsigned long long) unsigned long long __builtin_cnttzdm (unsigned long long, unsigned long long) vector unsigned long long vec_cntlzm (vector unsigned long long, vector unsigned long long) vector unsigned long long vec_cnttzm (vector unsigned long long, vector unsigned long long) Differential Revision: https://reviews.llvm.org/D80941 --- clang/include/clang/Basic/BuiltinsPPC.def | 6 +++ clang/lib/Headers/altivec.h | 15 +++++++ clang/test/CodeGen/builtins-ppc-p10.c | 10 +++++ clang/test/CodeGen/builtins-ppc-p10vector.c | 12 +++++ llvm/include/llvm/IR/IntrinsicsPowerPC.td | 16 +++++++ llvm/lib/Target/PowerPC/PPCInstrPrefix.td | 16 +++++++ .../test/CodeGen/PowerPC/p10-bit-manip-ops.ll | 44 +++++++++++++++++++ .../test/MC/Disassembler/PowerPC/p10insts.txt | 12 +++++ llvm/test/MC/PowerPC/p10.s | 12 +++++ 9 files changed, 143 insertions(+) diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def index d16c2d239064..5bc41c9d6cea 100644 --- a/clang/include/clang/Basic/BuiltinsPPC.def +++ b/clang/include/clang/Basic/BuiltinsPPC.def @@ -306,6 +306,10 @@ BUILTIN(__builtin_altivec_vpextd, "V2ULLiV2ULLiV2ULLi", "") BUILTIN(__builtin_altivec_vclrlb, "V16cV16cUi", "") BUILTIN(__builtin_altivec_vclrrb, "V16cV16cUi", "") +// P10 Vector Count Leading / Trailing Zeroes under bit Mask built-ins. +BUILTIN(__builtin_altivec_vclzdm, "V2ULLiV2ULLiV2ULLi", "") +BUILTIN(__builtin_altivec_vctzdm, "V2ULLiV2ULLiV2ULLi", "") + // VSX built-ins. BUILTIN(__builtin_vsx_lxvd2x, "V2divC*", "") @@ -485,6 +489,8 @@ BUILTIN(__builtin_divdeu, "ULLiULLiULLi", "") BUILTIN(__builtin_bpermd, "SLLiSLLiSLLi", "") BUILTIN(__builtin_pdepd, "ULLiULLiULLi", "") BUILTIN(__builtin_pextd, "ULLiULLiULLi", "") +BUILTIN(__builtin_cntlzdm, "ULLiULLiULLi", "") +BUILTIN(__builtin_cnttzdm, "ULLiULLiULLi", "") // Vector int128 (un)pack BUILTIN(__builtin_unpack_vector_int128, "ULLiV1LLLii", "") diff --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h index d9ad3e331c5c..f9fd3e2e50eb 100644 --- a/clang/lib/Headers/altivec.h +++ b/clang/lib/Headers/altivec.h @@ -16830,6 +16830,21 @@ vec_clrr(vector unsigned char __a, unsigned int __n) { return __builtin_altivec_vclrrb((vector signed char)__a, __n); #endif } + +/* vec_cntlzm */ + +static __inline__ vector unsigned long long __ATTRS_o_ai +vec_cntlzm(vector unsigned long long __a, vector unsigned long long __b) { + return __builtin_altivec_vclzdm(__a, __b); +} + +/* vec_cnttzm */ + +static __inline__ vector unsigned long long __ATTRS_o_ai +vec_cnttzm(vector unsigned long long __a, vector unsigned long long __b) { + return __builtin_altivec_vctzdm(__a, __b); +} + #endif /* __POWER10_VECTOR__ */ #undef __ATTRS_o_ai diff --git a/clang/test/CodeGen/builtins-ppc-p10.c b/clang/test/CodeGen/builtins-ppc-p10.c index c21e8026d0c9..5776dfae66dc 100644 --- a/clang/test/CodeGen/builtins-ppc-p10.c +++ b/clang/test/CodeGen/builtins-ppc-p10.c @@ -13,3 +13,13 @@ unsigned long long test_pextd(void) { // CHECK: @llvm.ppc.pextd return __builtin_pextd(ulla, ullb); } + +unsigned long long test_cntlzdm(void) { + // CHECK: @llvm.ppc.cntlzdm + return __builtin_cntlzdm(ulla, ullb); +} + +unsigned long long test_cnttzdm(void) { + // CHECK: @llvm.ppc.cnttzdm + return __builtin_cnttzdm(ulla, ullb); +} diff --git a/clang/test/CodeGen/builtins-ppc-p10vector.c b/clang/test/CodeGen/builtins-ppc-p10vector.c index be4a1b59bfe5..42c0ed917801 100644 --- a/clang/test/CodeGen/builtins-ppc-p10vector.c +++ b/clang/test/CodeGen/builtins-ppc-p10vector.c @@ -79,3 +79,15 @@ vector unsigned char test_vec_clrr_uc(void) { // CHECK-LE-NEXT: ret <16 x i8> return vec_clrr(vuca, uia); } + +vector unsigned long long test_vclzdm(void) { + // CHECK: @llvm.ppc.altivec.vclzdm(<2 x i64> + // CHECK-NEXT: ret <2 x i64> + return vec_cntlzm(vulla, vullb); +} + +vector unsigned long long test_vctzdm(void) { + // CHECK: @llvm.ppc.altivec.vctzdm(<2 x i64> + // CHECK-NEXT: ret <2 x i64> + return vec_cnttzm(vulla, vullb); +} diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td index 43bd706cf104..0f8521e5b6c5 100644 --- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -68,6 +68,14 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.". : GCCBuiltin<"__builtin_pextd">, Intrinsic <[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; + // Count Leading / Trailing Zeroes under bit Mask Builtins. + def int_ppc_cntlzdm + : GCCBuiltin<"__builtin_cntlzdm">, + Intrinsic <[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; + def int_ppc_cnttzdm + : GCCBuiltin<"__builtin_cnttzdm">, + Intrinsic <[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; + def int_ppc_truncf128_round_to_odd : GCCBuiltin<"__builtin_truncf128_round_to_odd">, Intrinsic <[llvm_double_ty], [llvm_f128_ty], [IntrNoMem]>; @@ -673,6 +681,14 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.". Intrinsic<[llvm_v1i128_ty], [llvm_v1i128_ty, llvm_v1i128_ty, llvm_v1i128_ty], [IntrNoMem]>; + + // P10 Vector Count Leading / Trailing Zeroes under bit Mask Builtins. + def int_ppc_altivec_vclzdm : GCCBuiltin<"__builtin_altivec_vclzdm">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + def int_ppc_altivec_vctzdm : GCCBuiltin<"__builtin_altivec_vctzdm">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; } def int_ppc_altivec_vsl : PowerPC_Vec_WWW_Intrinsic<"vsl">; diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td index ccee15372c69..381da1b87c36 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td +++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td @@ -532,6 +532,22 @@ let Predicates = [IsISA3_1] in { def PEXTD : XForm_6<31, 188, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), "pextd $rA, $rS, $rB", IIC_IntGeneral, [(set i64:$rA, (int_ppc_pextd i64:$rS, i64:$rB))]>; + def VCLZDM : VXForm_1<1924, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vclzdm $vD, $vA, $vB", IIC_VecGeneral, + [(set v2i64:$vD, + (int_ppc_altivec_vclzdm v2i64:$vA, v2i64:$vB))]>; + def VCTZDM : VXForm_1<1988, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vctzdm $vD, $vA, $vB", IIC_VecGeneral, + [(set v2i64:$vD, + (int_ppc_altivec_vctzdm v2i64:$vA, v2i64:$vB))]>; + def CNTLZDM : XForm_6<31, 59, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), + "cntlzdm $rA, $rS, $rB", IIC_IntGeneral, + [(set i64:$rA, + (int_ppc_cntlzdm i64:$rS, i64:$rB))]>; + def CNTTZDM : XForm_6<31, 571, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), + "cnttzdm $rA, $rS, $rB", IIC_IntGeneral, + [(set i64:$rA, + (int_ppc_cnttzdm i64:$rS, i64:$rB))]>; def XXGENPCVBM : XForm_XT6_IMM5_VB5<60, 916, (outs vsrc:$XT), (ins vrrc:$VRB, s5imm:$IMM), "xxgenpcvbm $XT, $VRB, $IMM", IIC_VecGeneral, []>; diff --git a/llvm/test/CodeGen/PowerPC/p10-bit-manip-ops.ll b/llvm/test/CodeGen/PowerPC/p10-bit-manip-ops.ll index fc2ebf89079f..d2a00b048446 100644 --- a/llvm/test/CodeGen/PowerPC/p10-bit-manip-ops.ll +++ b/llvm/test/CodeGen/PowerPC/p10-bit-manip-ops.ll @@ -9,6 +9,10 @@ declare <2 x i64> @llvm.ppc.altivec.vpdepd(<2 x i64>, <2 x i64>) declare <2 x i64> @llvm.ppc.altivec.vpextd(<2 x i64>, <2 x i64>) declare i64 @llvm.ppc.pdepd(i64, i64) declare i64 @llvm.ppc.pextd(i64, i64) +declare <2 x i64> @llvm.ppc.altivec.vclzdm(<2 x i64>, <2 x i64>) +declare <2 x i64> @llvm.ppc.altivec.vctzdm(<2 x i64>, <2 x i64>) +declare i64 @llvm.ppc.cntlzdm(i64, i64) +declare i64 @llvm.ppc.cnttzdm(i64, i64) define <2 x i64> @test_vpdepd(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test_vpdepd: @@ -49,3 +53,43 @@ entry: %tmp = tail call i64 @llvm.ppc.pextd(i64 %a, i64 %b) ret i64 %tmp } + +define <2 x i64> @test_vclzdm(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_vclzdm: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vclzdm v2, v2, v3 +; CHECK-NEXT: blr +entry: + %tmp = tail call <2 x i64> @llvm.ppc.altivec.vclzdm(<2 x i64> %a, <2 x i64> %b) + ret <2 x i64> %tmp +} + +define <2 x i64> @test_vctzdm(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_vctzdm: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vctzdm v2, v2, v3 +; CHECK-NEXT: blr +entry: + %tmp = tail call <2 x i64> @llvm.ppc.altivec.vctzdm(<2 x i64> %a, <2 x i64> %b) + ret <2 x i64> %tmp +} + +define i64 @test_cntlzdm(i64 %a, i64 %b) { +; CHECK-LABEL: test_cntlzdm: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cntlzdm r3, r3, r4 +; CHECK-NEXT: blr +entry: + %tmp = tail call i64 @llvm.ppc.cntlzdm(i64 %a, i64 %b) + ret i64 %tmp +} + +define i64 @test_cnttzdm(i64 %a, i64 %b) { +; CHECK-LABEL: test_cnttzdm: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cnttzdm r3, r3, r4 +; CHECK-NEXT: blr +entry: + %tmp = tail call i64 @llvm.ppc.cnttzdm(i64 %a, i64 %b) + ret i64 %tmp +} diff --git a/llvm/test/MC/Disassembler/PowerPC/p10insts.txt b/llvm/test/MC/Disassembler/PowerPC/p10insts.txt index c4e836c57779..a263004413da 100644 --- a/llvm/test/MC/Disassembler/PowerPC/p10insts.txt +++ b/llvm/test/MC/Disassembler/PowerPC/p10insts.txt @@ -13,6 +13,18 @@ # CHECK: pextd 1, 2, 4 0x7c 0x41 0x21 0x78 +# CHECK: vclzdm 1, 2, 3 +0x10 0x22 0x1f 0x84 + +# CHECK: vctzdm 1, 2, 3 +0x10 0x22 0x1f 0xc4 + +# CHECK: cntlzdm 1, 3, 2 +0x7c 0x61 0x10 0x76 + +# CHECK: cnttzdm 1, 3, 2 +0x7c 0x61 0x14 0x76 + # CHECK xxgenpcvbm 0, 1, 2 0xf0 0x02 0x0f 0x28 diff --git a/llvm/test/MC/PowerPC/p10.s b/llvm/test/MC/PowerPC/p10.s index e14f6d7e3dd6..ec2afd8c5062 100644 --- a/llvm/test/MC/PowerPC/p10.s +++ b/llvm/test/MC/PowerPC/p10.s @@ -15,6 +15,18 @@ # CHECK-BE: pextd 1, 2, 4 # encoding: [0x7c,0x41,0x21,0x78] # CHECK-LE: pextd 1, 2, 4 # encoding: [0x78,0x21,0x41,0x7c] pextd 1, 2, 4 +# CHECK-BE: vclzdm 1, 2, 3 # encoding: [0x10,0x22,0x1f,0x84] +# CHECK-LE: vclzdm 1, 2, 3 # encoding: [0x84,0x1f,0x22,0x10] + vclzdm 1, 2, 3 +# CHECK-BE: vctzdm 1, 2, 3 # encoding: [0x10,0x22,0x1f,0xc4] +# CHECK-LE: vctzdm 1, 2, 3 # encoding: [0xc4,0x1f,0x22,0x10] + vctzdm 1, 2, 3 +# CHECK-BE: cntlzdm 1, 3, 2 # encoding: [0x7c,0x61,0x10,0x76] +# CHECK-LE: cntlzdm 1, 3, 2 # encoding: [0x76,0x10,0x61,0x7c] + cntlzdm 1, 3, 2 +# CHECK-BE: cnttzdm 1, 3, 2 # encoding: [0x7c,0x61,0x14,0x76] +# CHECK-LE: cnttzdm 1, 3, 2 # encoding: [0x76,0x14,0x61,0x7c] + cnttzdm 1, 3, 2 # CHECK-BE: xxgenpcvbm 0, 1, 2 # encoding: [0xf0,0x02,0x0f,0x28] # CHECK-LE: xxgenpcvbm 0, 1, 2 # encoding: [0x28,0x0f,0x02,0xf0] xxgenpcvbm 0, 1, 2