From 54398015bf8cbdc3af54dda74807d6f3c8436164 Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Sun, 13 Jan 2013 02:22:39 +0000 Subject: [PATCH] Added builtins for multiprecision adds. We lower all of these intrinsics into a 2x chained usage of uadd.with.overflow. llvm-svn: 172341 --- clang/include/clang/Basic/Builtins.def | 6 ++ clang/lib/CodeGen/CGBuiltin.cpp | 70 ++++++++++++++++++ clang/test/CodeGen/builtins-multipercision.c | 74 ++++++++++++++++++++ 3 files changed, 150 insertions(+) create mode 100644 clang/test/CodeGen/builtins-multipercision.c diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def index b5f1c860d4c3..6d8afefb0449 100644 --- a/clang/include/clang/Basic/Builtins.def +++ b/clang/include/clang/Basic/Builtins.def @@ -925,5 +925,11 @@ LIBBUILTIN(_Block_object_dispose, "vvC*iC", "f", "Blocks.h", ALL_LANGUAGES) // Annotation function BUILTIN(__builtin_annotation, "v.", "tn") +// Multiprecision Arithmetic Builtins. +BUILTIN(__builtin_addcs, "UsUsCUsCUsCUs*", "n") +BUILTIN(__builtin_addc, "UiUiCUiCUiCUi*", "n") +BUILTIN(__builtin_addcl, "ULiULiCULiCULiCULi*", "n") +BUILTIN(__builtin_addcll, "ULLiULLiCULLiCULLiCULLi*", "n") + #undef BUILTIN #undef LIBBUILTIN diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 55f2757ccd6d..9badeaf613c3 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -169,6 +169,31 @@ static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *Fn, ReturnValueSlot(), E->arg_begin(), E->arg_end(), Fn); } +/// \brief Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.* +/// depending on IntrinsicID. +/// +/// \arg CGF The current codegen function. +/// \arg IntrinsicID The ID for the Intrinsic we wish to generate. +/// \arg X The first argument to the llvm.*.with.overflow.*. +/// \arg Y The second argument to the llvm.*.with.overflow.*. +/// \arg Carry The carry returned by the llvm.*.with.overflow.*. +/// \returns The result (i.e. sum/product) returned by the intrinsic. +static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF, + const llvm::Intrinsic::ID IntrinsicID, + llvm::Value *X, llvm::Value *Y, + llvm::Value *&Carry) { + // Make sure we have integers of the same width. + assert(X->getType() == Y->getType() && + "Arguments must be the same type. (Did you forget to make sure both " + "arguments have the same integer width?)"); + + ArrayRef Type(X->getType()); + llvm::Value *Callee = CGF.CGM.getIntrinsic(IntrinsicID, Type); + llvm::Value *Tmp = CGF.Builder.CreateCall2(Callee, X, Y); + Carry = CGF.Builder.CreateExtractValue(Tmp, 1); + return CGF.Builder.CreateExtractValue(Tmp, 0); +} + RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, unsigned BuiltinID, const CallExpr *E) { // See if we can constant fold this builtin. If so, don't emit it at all. @@ -1321,6 +1346,51 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, StringRef Str = cast(AnnotationStrExpr)->getString(); return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc())); } + case Builtin::BI__builtin_addcs: + case Builtin::BI__builtin_addc: + case Builtin::BI__builtin_addcl: + case Builtin::BI__builtin_addcll: { + + // We translate all of these builtins from expressions of the form: + // int x = ..., y = ..., carryin = ..., carryout, result; + // result = __builtin_addc(x, y, carryin, &carryout); + // + // to LLVM IR of the form: + // + // %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y) + // %tmpsum1 = extractvalue {i32, i1} %tmp1, 0 + // %carry1 = extractvalue {i32, i1} %tmp1, 1 + // %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1, + // i32 %carryin) + // %result = extractvalue {i32, i1} %tmp2, 0 + // %carry2 = extractvalue {i32, i1} %tmp2, 1 + // %tmp3 = or i1 %carry1, %carry2 + // %tmp4 = zext i1 %tmp3 to i32 + // store i32 %tmp4, i32* %carryout + + // Scalarize our inputs. + llvm::Value *X = EmitScalarExpr(E->getArg(0)); + llvm::Value *Y = EmitScalarExpr(E->getArg(1)); + llvm::Value *Carryin = EmitScalarExpr(E->getArg(2)); + std::pair CarryOutPtr = + EmitPointerWithAlignment(E->getArg(3)); + + const llvm::Intrinsic::ID IntrinsicId = llvm::Intrinsic::uadd_with_overflow; + + // Construct our resulting LLVM IR expression. + llvm::Value *Carry1; + llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId, + X, Y, Carry1); + llvm::Value *Carry2; + llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId, + Sum1, Carryin, Carry2); + llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2), + X->getType()); + llvm::StoreInst *CarryOutStore = Builder.CreateStore(CarryOut, + CarryOutPtr.first); + CarryOutStore->setAlignment(CarryOutPtr.second); + return RValue::get(Sum2); + } case Builtin::BI__noop: return RValue::get(0); } diff --git a/clang/test/CodeGen/builtins-multipercision.c b/clang/test/CodeGen/builtins-multipercision.c new file mode 100644 index 000000000000..d49d0fd70232 --- /dev/null +++ b/clang/test/CodeGen/builtins-multipercision.c @@ -0,0 +1,74 @@ +// RUN: %clang_cc1 -emit-llvm -x c %s -o - -O3 | FileCheck %s + +unsigned short test_addcs(unsigned short x, unsigned short y, + unsigned short carryin, unsigned short *z) { + // CHECK: @test_addcs + // CHECK: %{{.+}} = {{.*}} call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 %x, i16 %y) + // CHECK: %{{.+}} = extractvalue { i16, i1 } %{{.+}}, 1 + // CHECK: %{{.+}} = extractvalue { i16, i1 } %{{.+}}, 0 + // CHECK: %{{.+}} = {{.*}} call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 %{{.+}}, i16 %carryin) + // CHECK: %{{.+}} = extractvalue { i16, i1 } %{{.+}}, 1 + // CHECK: %{{.+}} = extractvalue { i16, i1 } %{{.+}}, 0 + // CHECK: %{{.+}} = or i1 %{{.+}}, %{{.+}} + // CHECK: %{{.+}} = zext i1 %{{.+}} to i16 + // CHECK: store i16 %{{.+}}, i16* %z, align 2 + + unsigned short carryout; + *z = __builtin_addcs(x, y, carryin, &carryout); + + return carryout; +} + +unsigned test_addc(unsigned x, unsigned y, unsigned carryin, unsigned *z) { + // CHECK: @test_addc + // CHECK: %{{.+}} = {{.*}} call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x, i32 %y) + // CHECK: %{{.+}} = extractvalue { i32, i1 } %{{.+}}, 1 + // CHECK: %{{.+}} = extractvalue { i32, i1 } %{{.+}}, 0 + // CHECK: %{{.+}} = {{.*}} call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %{{.+}}, i32 %carryin) + // CHECK: %{{.+}} = extractvalue { i32, i1 } %{{.+}}, 1 + // CHECK: %{{.+}} = extractvalue { i32, i1 } %{{.+}}, 0 + // CHECK: %{{.+}} = or i1 %{{.+}}, %{{.+}} + // CHECK: %{{.+}} = zext i1 %{{.+}} to i32 + // CHECK: store i32 %{{.+}}, i32* %z, align 4 + unsigned carryout; + *z = __builtin_addc(x, y, carryin, &carryout); + + return carryout; +} + +unsigned long test_addcl(unsigned long x, unsigned long y, + unsigned long carryin, unsigned long *z) { + // CHECK: @test_addcl + // CHECK: %{{.+}} = {{.*}} call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %x, i64 %y) + // CHECK: %{{.+}} = extractvalue { i64, i1 } %{{.+}}, 1 + // CHECK: %{{.+}} = extractvalue { i64, i1 } %{{.+}}, 0 + // CHECK: %{{.+}} = {{.*}} call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %{{.+}}, i64 %carryin) + // CHECK: %{{.+}} = extractvalue { i64, i1 } %{{.+}}, 1 + // CHECK: %{{.+}} = extractvalue { i64, i1 } %{{.+}}, 0 + // CHECK: %{{.+}} = or i1 %{{.+}}, %{{.+}} + // CHECK: %{{.+}} = zext i1 %{{.+}} to i64 + // CHECK: store i64 %{{.+}}, i64* %z, align 8 + unsigned long carryout; + *z = __builtin_addcl(x, y, carryin, &carryout); + + return carryout; +} + +unsigned long long test_addcll(unsigned long long x, unsigned long long y, + unsigned long long carryin, + unsigned long long *z) { + // CHECK: @test_addcll + // CHECK: %{{.+}} = {{.*}} call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %x, i64 %y) + // CHECK: %{{.+}} = extractvalue { i64, i1 } %{{.+}}, 1 + // CHECK: %{{.+}} = extractvalue { i64, i1 } %{{.+}}, 0 + // CHECK: %{{.+}} = {{.*}} call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %{{.+}}, i64 %carryin) + // CHECK: %{{.+}} = extractvalue { i64, i1 } %{{.+}}, 1 + // CHECK: %{{.+}} = extractvalue { i64, i1 } %{{.+}}, 0 + // CHECK: %{{.+}} = or i1 %{{.+}}, %{{.+}} + // CHECK: %{{.+}} = zext i1 %{{.+}} to i64 + // CHECK: store i64 %{{.+}}, i64* %z, align 8 + unsigned long long carryout; + *z = __builtin_addcll(x, y, carryin, &carryout); + + return carryout; +}