From 7216f17653631046264b6741982afe8ee7f5e4d8 Mon Sep 17 00:00:00 2001 From: Albert Gutowski Date: Mon, 10 Oct 2016 18:09:27 +0000 Subject: [PATCH] Implement __emul, __emulu, _mul128 and _umul128 MS intrinsics Reviewers: rnk, thakis, majnemer, hans Subscribers: cfe-commits Differential Revision: https://reviews.llvm.org/D25353 llvm-svn: 283785 --- clang/include/clang/Basic/BuiltinsX86.def | 3 + clang/include/clang/Basic/BuiltinsX86_64.def | 2 + clang/lib/CodeGen/CGBuiltin.cpp | 29 +++++--- clang/lib/Headers/intrin.h | 38 +++------- clang/test/CodeGen/ms-intrinsics.c | 26 ------- clang/test/CodeGen/ms-x86-intrinsics.c | 74 ++++++++++++++++++++ 6 files changed, 110 insertions(+), 62 deletions(-) create mode 100644 clang/test/CodeGen/ms-x86-intrinsics.c diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def index 666f40e6243f..6154b8cb6ecb 100644 --- a/clang/include/clang/Basic/BuiltinsX86.def +++ b/clang/include/clang/Basic/BuiltinsX86.def @@ -2071,6 +2071,9 @@ TARGET_BUILTIN(__builtin_ia32_selectpd_512, "V8dUcV8dV8d", "", "") TARGET_BUILTIN(__builtin_ia32_monitorx, "vv*UiUi", "", "mwaitx") TARGET_BUILTIN(__builtin_ia32_mwaitx, "vUiUiUi", "", "mwaitx") +TARGET_HEADER_BUILTIN(__emul, "LLiii", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(__emulu, "ULLiUiUi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") + #undef BUILTIN #undef TARGET_BUILTIN #undef TARGET_HEADER_BUILTIN diff --git a/clang/include/clang/Basic/BuiltinsX86_64.def b/clang/include/clang/Basic/BuiltinsX86_64.def index d94bedc1984a..8836e68bfdc4 100644 --- a/clang/include/clang/Basic/BuiltinsX86_64.def +++ b/clang/include/clang/Basic/BuiltinsX86_64.def @@ -20,6 +20,8 @@ TARGET_HEADER_BUILTIN(__mulh, "LLiLLiLLi", "nch", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(__umulh, "ULLiULLiULLi", "nch", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_mul128, "LLiLLiLLiLLi*", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_umul128, "ULLiULLiULLiULLi*", "nh", "intrin.h", ALL_MS_LANGUAGES, "") #undef BUILTIN #undef TARGET_HEADER_BUILTIN diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index d05e910f244e..5cbb6a2338f5 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -7576,16 +7576,24 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_cmpordsd: return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7); + case X86::BI__emul: + case X86::BI__emulu: { + llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64); + bool isSigned = (BuiltinID == X86::BI__emul); + Value *LHS = Builder.CreateIntCast(Ops[0], Int64Ty, isSigned); + Value *RHS = Builder.CreateIntCast(Ops[1], Int64Ty, isSigned); + return Builder.CreateMul(LHS, RHS, "", !isSigned, isSigned); + } case X86::BI__mulh: - case X86::BI__umulh: { - Value *LHS = EmitScalarExpr(E->getArg(0)); - Value *RHS = EmitScalarExpr(E->getArg(1)); + case X86::BI__umulh: + case X86::BI_mul128: + case X86::BI_umul128: { llvm::Type *ResType = ConvertType(E->getType()); llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128); - bool IsSigned = (BuiltinID == X86::BI__mulh); - LHS = Builder.CreateIntCast(LHS, Int128Ty, IsSigned); - RHS = Builder.CreateIntCast(RHS, Int128Ty, IsSigned); + bool IsSigned = (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI_mul128); + Value *LHS = Builder.CreateIntCast(Ops[0], Int128Ty, IsSigned); + Value *RHS = Builder.CreateIntCast(Ops[1], Int128Ty, IsSigned); Value *MulResult, *HigherBits; if (IsSigned) { @@ -7595,9 +7603,14 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, MulResult = Builder.CreateNUWMul(LHS, RHS); HigherBits = Builder.CreateLShr(MulResult, 64); } - HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned); - return HigherBits; + + if (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI__umulh) + return HigherBits; + + Address HighBitsAddress = EmitPointerWithAlignment(E->getArg(2)); + Builder.CreateStore(HigherBits, HighBitsAddress); + return Builder.CreateIntCast(MulResult, ResType, IsSigned); } } } diff --git a/clang/lib/Headers/intrin.h b/clang/lib/Headers/intrin.h index 3a45ad912212..100da0b0d70c 100644 --- a/clang/lib/Headers/intrin.h +++ b/clang/lib/Headers/intrin.h @@ -66,7 +66,9 @@ void __cpuid(int[4], int); static __inline__ void __cpuidex(int[4], int, int); void __debugbreak(void); +static __inline__ __int64 __emul(int, int); +static __inline__ unsigned __int64 __emulu(unsigned int, unsigned int); void __cdecl __fastfail(unsigned int); unsigned int __getcallerseflags(void); @@ -313,8 +315,6 @@ unsigned __int64 __lzcnt64(unsigned __int64); static __inline__ void __movsq(unsigned long long *, unsigned long long const *, size_t); static __inline__ -__int64 __mulh(__int64, __int64); -static __inline__ unsigned __int64 __popcnt64(unsigned __int64); static __inline__ unsigned char __readgsbyte(unsigned long); @@ -405,9 +405,6 @@ static __inline__ __int64 _InterlockedXor64(__int64 volatile *_Value, __int64 _Mask); __int64 _InterlockedXor64_np(__int64 volatile *_Value, __int64 _Mask); char _InterlockedXor8_np(char volatile *_Value, char _Mask); -static __inline__ -__int64 _mul128(__int64 _Multiplier, __int64 _Multiplicand, - __int64 *_HighProduct); unsigned __int64 _rorx_u64(unsigned __int64, const unsigned int); __int64 _sarx_i64(__int64, unsigned int); #if __STDC_HOSTED__ @@ -415,34 +412,19 @@ int __cdecl _setjmpex(jmp_buf); #endif unsigned __int64 _shlx_u64(unsigned __int64, unsigned int); unsigned __int64 _shrx_u64(unsigned __int64, unsigned int); -/* - * Multiply two 64-bit integers and obtain a 64-bit result. - * The low-half is returned directly and the high half is in an out parameter. - */ -static __inline__ unsigned __int64 __DEFAULT_FN_ATTRS -_umul128(unsigned __int64 _Multiplier, unsigned __int64 _Multiplicand, - unsigned __int64 *_HighProduct) { - unsigned __int128 _FullProduct = - (unsigned __int128)_Multiplier * (unsigned __int128)_Multiplicand; - *_HighProduct = _FullProduct >> 64; - return _FullProduct; -} +static __inline__ +__int64 __mulh(__int64, __int64); static __inline__ unsigned __int64 __umulh(unsigned __int64, unsigned __int64); +static __inline__ +__int64 _mul128(__int64, __int64, __int64*); +static __inline__ +unsigned __int64 _umul128(unsigned __int64, + unsigned __int64, + unsigned __int64*); #endif /* __x86_64__ */ -/*----------------------------------------------------------------------------*\ -|* Multiplication -\*----------------------------------------------------------------------------*/ -static __inline__ __int64 __DEFAULT_FN_ATTRS -__emul(int __in1, int __in2) { - return (__int64)__in1 * (__int64)__in2; -} -static __inline__ unsigned __int64 __DEFAULT_FN_ATTRS -__emulu(unsigned int __in1, unsigned int __in2) { - return (unsigned __int64)__in1 * (unsigned __int64)__in2; -} /*----------------------------------------------------------------------------*\ |* Bit Counting and Testing \*----------------------------------------------------------------------------*/ diff --git a/clang/test/CodeGen/ms-intrinsics.c b/clang/test/CodeGen/ms-intrinsics.c index d3ca33b6ecc8..a0f27f81055a 100644 --- a/clang/test/CodeGen/ms-intrinsics.c +++ b/clang/test/CodeGen/ms-intrinsics.c @@ -41,32 +41,6 @@ void *test_InterlockedCompareExchangePointer(void * volatile *Destination, // CHECK: ret i8* %[[RESULT:[0-9]+]] // CHECK: } -#if defined(__i386__) -long test__readfsdword(unsigned long Offset) { - return __readfsdword(Offset); -} - -// CHECK-I386: define i32 @test__readfsdword(i32 %Offset){{.*}}{ -// CHECK-I386: [[PTR:%[0-9]+]] = inttoptr i32 %Offset to i32 addrspace(257)* -// CHECK-I386: [[VALUE:%[0-9]+]] = load volatile i32, i32 addrspace(257)* [[PTR]], align 4 -// CHECK-I386: ret i32 [[VALUE:%[0-9]+]] -// CHECK-I386: } -#endif - -#if defined(__x86_64__) -__int64 test__mulh(__int64 a, __int64 b) { - return __mulh(a, b); -} -// CHECK-X64-LABEL: define i64 @test__mulh(i64 %a, i64 %b) -// CHECK-X64: = mul nsw i128 % - -unsigned __int64 test__umulh(unsigned __int64 a, unsigned __int64 b) { - return __umulh(a, b); -} -// CHECK-X64-LABEL: define i64 @test__umulh(i64 %a, i64 %b) -// CHECK-X64: = mul nuw i128 % -#endif - char test_InterlockedExchange8(char volatile *value, char mask) { return _InterlockedExchange8(value, mask); } diff --git a/clang/test/CodeGen/ms-x86-intrinsics.c b/clang/test/CodeGen/ms-x86-intrinsics.c new file mode 100644 index 000000000000..e635220e8c13 --- /dev/null +++ b/clang/test/CodeGen/ms-x86-intrinsics.c @@ -0,0 +1,74 @@ +// RUN: %clang_cc1 -ffreestanding -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 \ +// RUN: -triple i686--windows -Oz -emit-llvm %s -o - \ +// RUN: | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-I386 +// RUN: %clang_cc1 -ffreestanding -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 \ +// RUN: -triple x86_64--windows -Oz -emit-llvm %s -o - \ +// RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-X64 + +#if defined(__i386__) +long test__readfsdword(unsigned long Offset) { + return __readfsdword(Offset); +} + +// CHECK-I386-LABEL: define i32 @test__readfsdword(i32 %Offset){{.*}}{ +// CHECK-I386: [[PTR:%[0-9]+]] = inttoptr i32 %Offset to i32 addrspace(257)* +// CHECK-I386: [[VALUE:%[0-9]+]] = load volatile i32, i32 addrspace(257)* [[PTR]], align 4 +// CHECK-I386: ret i32 [[VALUE:%[0-9]+]] +// CHECK-I386: } +#endif + +__int64 test__emul(int a, int b) { + return __emul(a, b); +} +// CHECK-LABEL: define i64 @test__emul(i32 %a, i32 %b) +// CHECK: [[X:%[0-9]+]] = sext i32 %a to i64 +// CHECK: [[Y:%[0-9]+]] = sext i32 %b to i64 +// CHECK: [[RES:%[0-9]+]] = mul nsw i64 [[Y]], [[X]] +// CHECK: ret i64 [[RES]] + +unsigned __int64 test__emulu(unsigned int a, unsigned int b) { + return __emulu(a, b); +} +// CHECK-LABEL: define i64 @test__emulu(i32 %a, i32 %b) +// CHECK: [[X:%[0-9]+]] = zext i32 %a to i64 +// CHECK: [[Y:%[0-9]+]] = zext i32 %b to i64 +// CHECK: [[RES:%[0-9]+]] = mul nuw i64 [[Y]], [[X]] +// CHECK: ret i64 [[RES]] + +#if defined(__x86_64__) +__int64 test__mulh(__int64 a, __int64 b) { + return __mulh(a, b); +} +// CHECK-X64-LABEL: define i64 @test__mulh(i64 %a, i64 %b) +// CHECK-X64: = mul nsw i128 % + +unsigned __int64 test__umulh(unsigned __int64 a, unsigned __int64 b) { + return __umulh(a, b); +} +// CHECK-X64-LABEL: define i64 @test__umulh(i64 %a, i64 %b) +// CHECK-X64: = mul nuw i128 % + +__int64 test_mul128(__int64 Multiplier, + __int64 Multiplicand, + __int64 *HighProduct) { + return _mul128(Multiplier, Multiplicand, HighProduct); +} +// CHECK-X64-LABEL: define i64 @test_mul128(i64 %Multiplier, i64 %Multiplicand, i64*{{[a-z_ ]*}}%HighProduct) +// CHECK-X64: = sext i64 %Multiplier to i128 +// CHECK-X64: = sext i64 %Multiplicand to i128 +// CHECK-X64: = mul nsw i128 % +// CHECK-X64: store i64 % +// CHECK-X64: ret i64 % + +unsigned __int64 test_umul128(unsigned __int64 Multiplier, + unsigned __int64 Multiplicand, + unsigned __int64 *HighProduct) { + return _umul128(Multiplier, Multiplicand, HighProduct); +} +// CHECK-X64-LABEL: define i64 @test_umul128(i64 %Multiplier, i64 %Multiplicand, i64*{{[a-z_ ]*}}%HighProduct) +// CHECK-X64: = zext i64 %Multiplier to i128 +// CHECK-X64: = zext i64 %Multiplicand to i128 +// CHECK-X64: = mul nuw i128 % +// CHECK-X64: store i64 % +// CHECK-X64: ret i64 % +#endif