From accb65b994074caba156d35ead9c7b50f6c320ac Mon Sep 17 00:00:00 2001 From: QingShan Zhang Date: Thu, 20 Sep 2018 05:04:57 +0000 Subject: [PATCH] [PowerPC] [Clang] Add vector int128 pack/unpack builtins unsigned long long builtin_unpack_vector_int128 (vector int128_t, int); vector int128_t builtin_pack_vector_int128 (unsigned long long, unsigned long long); Builtins should behave the same way as in GCC. Patch By: wuzish (Zixuan Wu) Differential Revision: https://reviews.llvm.org/D52074 llvm-svn: 342614 --- clang/include/clang/Basic/BuiltinsPPC.def | 4 +++ clang/lib/CodeGen/CGBuiltin.cpp | 22 ++++++++++++++ clang/lib/Sema/SemaChecking.cpp | 12 ++++++++ clang/test/CodeGen/builtins-ppc-error.c | 6 ++++ clang/test/CodeGen/builtins-ppc-p7-disabled.c | 8 +++++ clang/test/CodeGen/builtins-ppc-vsx.c | 29 +++++++++++++++++++ 6 files changed, 81 insertions(+) diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def index 9be760853d50..6791e188bfc5 100644 --- a/clang/include/clang/Basic/BuiltinsPPC.def +++ b/clang/include/clang/Basic/BuiltinsPPC.def @@ -470,6 +470,10 @@ BUILTIN(__builtin_divde, "SLLiSLLiSLLi", "") BUILTIN(__builtin_divdeu, "ULLiULLiULLi", "") BUILTIN(__builtin_bpermd, "SLLiSLLiSLLi", "") +// Vector int128 (un)pack +BUILTIN(__builtin_unpack_vector_int128, "ULLiV1LLLii", "") +BUILTIN(__builtin_pack_vector_int128, "V1LLLiULLiULLi", "") + // FIXME: Obviously incomplete. #undef BUILTIN diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 90505ac088d8..23c760207b84 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -11233,6 +11233,28 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, auto RetTy = ConvertType(BIRetType); return Builder.CreateBitCast(ShuffleCall, RetTy); } + + case PPC::BI__builtin_pack_vector_int128: { + bool isLittleEndian = getTarget().isLittleEndian(); + Value *UndefValue = + llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), 2)); + Value *Res = Builder.CreateInsertElement( + UndefValue, Ops[0], (uint64_t)(isLittleEndian ? 1 : 0)); + Res = Builder.CreateInsertElement(Res, Ops[1], + (uint64_t)(isLittleEndian ? 0 : 1)); + return Builder.CreateBitCast(Res, ConvertType(E->getType())); + } + + case PPC::BI__builtin_unpack_vector_int128: { + ConstantInt *Index = cast(Ops[1]); + Value *Unpacked = Builder.CreateBitCast( + Ops[0], llvm::VectorType::get(ConvertType(E->getType()), 2)); + + if (getTarget().isLittleEndian()) + Index = ConstantInt::get(Index->getType(), 1 - Index->getZExtValue()); + + return Builder.CreateExtractElement(Unpacked, Index); + } } } diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 4c504af7a08e..a1bfff4fb75a 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -2970,6 +2970,13 @@ bool Sema::CheckPPCBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { return Diag(TheCall->getBeginLoc(), diag::err_ppc_builtin_only_on_pwr7) << TheCall->getSourceRange(); + auto SemaVSXCheck = [&](CallExpr *TheCall) -> bool { + if (!Context.getTargetInfo().hasFeature("vsx")) + return Diag(TheCall->getBeginLoc(), diag::err_ppc_builtin_only_on_pwr7) + << TheCall->getSourceRange(); + return false; + }; + switch (BuiltinID) { default: return false; case PPC::BI__builtin_altivec_crypto_vshasigmaw: @@ -2988,6 +2995,11 @@ bool Sema::CheckPPCBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { case PPC::BI__builtin_vsx_xxpermdi: case PPC::BI__builtin_vsx_xxsldwi: return SemaBuiltinVSX(TheCall); + case PPC::BI__builtin_unpack_vector_int128: + return SemaVSXCheck(TheCall) || + SemaBuiltinConstantArgRange(TheCall, 1, 0, 1); + case PPC::BI__builtin_pack_vector_int128: + return SemaVSXCheck(TheCall); } return SemaBuiltinConstantArgRange(TheCall, i, l, u); } diff --git a/clang/test/CodeGen/builtins-ppc-error.c b/clang/test/CodeGen/builtins-ppc-error.c index 29eebf286100..a50dc2cba024 100644 --- a/clang/test/CodeGen/builtins-ppc-error.c +++ b/clang/test/CodeGen/builtins-ppc-error.c @@ -14,6 +14,7 @@ extern vector signed int vsi; extern vector signed int vui; extern vector float vf; extern vector unsigned char vuc; +extern vector signed __int128 vsllli; void testInsertWord(void) { int index = 5; @@ -67,3 +68,8 @@ void testCTU(int index) { void testVCTUXS(int index) { vec_vctuxs(vf, index); //expected-error {{argument to '__builtin_altivec_vctuxs' must be a constant integer}} } + +void testUnpack128(int index) { + __builtin_unpack_vector_int128(vsllli, index); //expected-error {{argument to '__builtin_unpack_vector_int128' must be a constant integer}} + __builtin_unpack_vector_int128(vsllli, 5); //expected-error {{argument value 5 is outside the valid range [0, 1]}} +} diff --git a/clang/test/CodeGen/builtins-ppc-p7-disabled.c b/clang/test/CodeGen/builtins-ppc-p7-disabled.c index 9ccedd2ce894..2a6a4f8dbc8e 100644 --- a/clang/test/CodeGen/builtins-ppc-p7-disabled.c +++ b/clang/test/CodeGen/builtins-ppc-p7-disabled.c @@ -6,6 +6,8 @@ // RUN: not %clang_cc1 -triple powerpc-unknown-unknown -emit-llvm %s -o - 2>&1 \ // RUN: -target-cpu pwr7 | FileCheck %s -check-prefix=CHECK-32 +vector signed __int128 vslll = {33}; + void call_p7_builtins(void) { int a = __builtin_divwe(33, 11); @@ -13,6 +15,8 @@ void call_p7_builtins(void) unsigned long long d = __builtin_divde(33ULL, 11ULL); unsigned long long e = __builtin_divdeu(33ULL, 11ULL); unsigned long long f = __builtin_bpermd(33ULL, 11ULL); + __builtin_pack_vector_int128(33ULL, 11ULL); + __builtin_unpack_vector_int128(vslll, 1); } // CHECK: error: this builtin is only valid on POWER7 or later CPUs @@ -25,6 +29,10 @@ void call_p7_builtins(void) // CHECK: __builtin_divdeu // CHECK: error: this builtin is only valid on POWER7 or later CPUs // CHECK: __builtin_bpermd +// CHECK: error: this builtin is only valid on POWER7 or later CPUs +// CHECK: __builtin_pack_vector_int128 +// CHECK: error: this builtin is only valid on POWER7 or later CPUs +// CHECK: __builtin_unpack_vector_int128 // CHECK-32: error: this builtin is only available on 64-bit targets // CHECK-32: __builtin_divde // CHECK-32: error: this builtin is only available on 64-bit targets diff --git a/clang/test/CodeGen/builtins-ppc-vsx.c b/clang/test/CodeGen/builtins-ppc-vsx.c index 848d24d4fb10..c19140b9464e 100644 --- a/clang/test/CodeGen/builtins-ppc-vsx.c +++ b/clang/test/CodeGen/builtins-ppc-vsx.c @@ -49,6 +49,7 @@ vector unsigned int res_vui; vector bool long long res_vbll; vector signed long long res_vsll; vector unsigned long long res_vull; +vector signed __int128 res_vslll; double res_d; float res_af[4]; @@ -1803,3 +1804,31 @@ vector double xxsldwi_should_not_assert(vector double a, vector double b) { // CHECK-NEXT-LE: shufflevector <4 x i32> %{{[0-9]+}}, <4 x i32> %{{[0-9]+}}, <4 x i32> // CHECK-NEXT-LE: bitcast <4 x i32> %{{[0-9]+}} to <2 x double> } + +void testVectorInt128Pack(){ +// CHECK-LABEL: testVectorInt128Pack +// CHECK-LABEL-LE: testVectorInt128Pack + res_vslll = __builtin_pack_vector_int128(aull[0], aull[1]); +// CHECK: %[[V1:[0-9]+]] = insertelement <2 x i64> undef, i64 %{{[0-9]+}}, i64 0 +// CHECK-NEXT: %[[V2:[0-9]+]] = insertelement <2 x i64> %[[V1]], i64 %{{[0-9]+}}, i64 1 +// CHECK-NEXT: bitcast <2 x i64> %[[V2]] to <1 x i128> + +// CHECK-LE: %[[V1:[0-9]+]] = insertelement <2 x i64> undef, i64 %{{[0-9]+}}, i64 1 +// CHECK-NEXT-LE: %[[V2:[0-9]+]] = insertelement <2 x i64> %[[V1]], i64 %{{[0-9]+}}, i64 0 +// CHECK-NEXT-LE: bitcast <2 x i64> %[[V2]] to <1 x i128> + + __builtin_unpack_vector_int128(res_vslll, 0); +// CHECK: %[[V1:[0-9]+]] = bitcast <1 x i128> %{{[0-9]+}} to <2 x i64> +// CHECK-NEXT: %{{[0-9]+}} = extractelement <2 x i64> %[[V1]], i32 0 + +// CHECK-LE: %[[V1:[0-9]+]] = bitcast <1 x i128> %{{[0-9]+}} to <2 x i64> +// CHECK-NEXT-LE: %{{[0-9]+}} = extractelement <2 x i64> %[[V1]], i32 1 + + __builtin_unpack_vector_int128(res_vslll, 1); +// CHECK: %[[V1:[0-9]+]] = bitcast <1 x i128> %{{[0-9]+}} to <2 x i64> +// CHECK-NEXT: %{{[0-9]+}} = extractelement <2 x i64> %[[V1]], i32 1 + +// CHECK-LE: %[[V1:[0-9]+]] = bitcast <1 x i128> %{{[0-9]+}} to <2 x i64> +// CHECK-NEXT-LE: %{{[0-9]+}} = extractelement <2 x i64> %[[V1]], i32 0 + +}