Added builtins for multiprecision adds.

We lower all of these intrinsics into a 2x chained usage of
uadd.with.overflow.

llvm-svn: 172341
This commit is contained in:
Michael Gottesman 2013-01-13 02:22:39 +00:00
parent 1f5a4323e9
commit 54398015bf
3 changed files with 150 additions and 0 deletions

View File

@ -925,5 +925,11 @@ LIBBUILTIN(_Block_object_dispose, "vvC*iC", "f", "Blocks.h", ALL_LANGUAGES)
// Annotation function
BUILTIN(__builtin_annotation, "v.", "tn")
// Multiprecision Arithmetic Builtins.
BUILTIN(__builtin_addcs, "UsUsCUsCUsCUs*", "n")
BUILTIN(__builtin_addc, "UiUiCUiCUiCUi*", "n")
BUILTIN(__builtin_addcl, "ULiULiCULiCULiCULi*", "n")
BUILTIN(__builtin_addcll, "ULLiULLiCULLiCULLiCULLi*", "n")
#undef BUILTIN
#undef LIBBUILTIN

View File

@ -169,6 +169,31 @@ static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *Fn,
ReturnValueSlot(), E->arg_begin(), E->arg_end(), Fn);
}
/// \brief Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
/// depending on IntrinsicID.
///
/// \arg CGF The current codegen function.
/// \arg IntrinsicID The ID for the Intrinsic we wish to generate.
/// \arg X The first argument to the llvm.*.with.overflow.*.
/// \arg Y The second argument to the llvm.*.with.overflow.*.
/// \arg Carry The carry returned by the llvm.*.with.overflow.*.
/// \returns The result (i.e. sum/product) returned by the intrinsic.
static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF,
const llvm::Intrinsic::ID IntrinsicID,
llvm::Value *X, llvm::Value *Y,
llvm::Value *&Carry) {
// Make sure we have integers of the same width.
assert(X->getType() == Y->getType() &&
"Arguments must be the same type. (Did you forget to make sure both "
"arguments have the same integer width?)");
ArrayRef<llvm::Type *> Type(X->getType());
llvm::Value *Callee = CGF.CGM.getIntrinsic(IntrinsicID, Type);
llvm::Value *Tmp = CGF.Builder.CreateCall2(Callee, X, Y);
Carry = CGF.Builder.CreateExtractValue(Tmp, 1);
return CGF.Builder.CreateExtractValue(Tmp, 0);
}
RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
unsigned BuiltinID, const CallExpr *E) {
// See if we can constant fold this builtin. If so, don't emit it at all.
@ -1321,6 +1346,51 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc()));
}
case Builtin::BI__builtin_addcs:
case Builtin::BI__builtin_addc:
case Builtin::BI__builtin_addcl:
case Builtin::BI__builtin_addcll: {
// We translate all of these builtins from expressions of the form:
// int x = ..., y = ..., carryin = ..., carryout, result;
// result = __builtin_addc(x, y, carryin, &carryout);
//
// to LLVM IR of the form:
//
// %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
// %tmpsum1 = extractvalue {i32, i1} %tmp1, 0
// %carry1 = extractvalue {i32, i1} %tmp1, 1
// %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,
// i32 %carryin)
// %result = extractvalue {i32, i1} %tmp2, 0
// %carry2 = extractvalue {i32, i1} %tmp2, 1
// %tmp3 = or i1 %carry1, %carry2
// %tmp4 = zext i1 %tmp3 to i32
// store i32 %tmp4, i32* %carryout
// Scalarize our inputs.
llvm::Value *X = EmitScalarExpr(E->getArg(0));
llvm::Value *Y = EmitScalarExpr(E->getArg(1));
llvm::Value *Carryin = EmitScalarExpr(E->getArg(2));
std::pair<llvm::Value*, unsigned> CarryOutPtr =
EmitPointerWithAlignment(E->getArg(3));
const llvm::Intrinsic::ID IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
// Construct our resulting LLVM IR expression.
llvm::Value *Carry1;
llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId,
X, Y, Carry1);
llvm::Value *Carry2;
llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId,
Sum1, Carryin, Carry2);
llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2),
X->getType());
llvm::StoreInst *CarryOutStore = Builder.CreateStore(CarryOut,
CarryOutPtr.first);
CarryOutStore->setAlignment(CarryOutPtr.second);
return RValue::get(Sum2);
}
case Builtin::BI__noop:
return RValue::get(0);
}

View File

@ -0,0 +1,74 @@
// RUN: %clang_cc1 -emit-llvm -x c %s -o - -O3 | FileCheck %s
unsigned short test_addcs(unsigned short x, unsigned short y,
unsigned short carryin, unsigned short *z) {
// CHECK: @test_addcs
// CHECK: %{{.+}} = {{.*}} call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 %x, i16 %y)
// CHECK: %{{.+}} = extractvalue { i16, i1 } %{{.+}}, 1
// CHECK: %{{.+}} = extractvalue { i16, i1 } %{{.+}}, 0
// CHECK: %{{.+}} = {{.*}} call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 %{{.+}}, i16 %carryin)
// CHECK: %{{.+}} = extractvalue { i16, i1 } %{{.+}}, 1
// CHECK: %{{.+}} = extractvalue { i16, i1 } %{{.+}}, 0
// CHECK: %{{.+}} = or i1 %{{.+}}, %{{.+}}
// CHECK: %{{.+}} = zext i1 %{{.+}} to i16
// CHECK: store i16 %{{.+}}, i16* %z, align 2
unsigned short carryout;
*z = __builtin_addcs(x, y, carryin, &carryout);
return carryout;
}
unsigned test_addc(unsigned x, unsigned y, unsigned carryin, unsigned *z) {
// CHECK: @test_addc
// CHECK: %{{.+}} = {{.*}} call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
// CHECK: %{{.+}} = extractvalue { i32, i1 } %{{.+}}, 1
// CHECK: %{{.+}} = extractvalue { i32, i1 } %{{.+}}, 0
// CHECK: %{{.+}} = {{.*}} call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %{{.+}}, i32 %carryin)
// CHECK: %{{.+}} = extractvalue { i32, i1 } %{{.+}}, 1
// CHECK: %{{.+}} = extractvalue { i32, i1 } %{{.+}}, 0
// CHECK: %{{.+}} = or i1 %{{.+}}, %{{.+}}
// CHECK: %{{.+}} = zext i1 %{{.+}} to i32
// CHECK: store i32 %{{.+}}, i32* %z, align 4
unsigned carryout;
*z = __builtin_addc(x, y, carryin, &carryout);
return carryout;
}
unsigned long test_addcl(unsigned long x, unsigned long y,
unsigned long carryin, unsigned long *z) {
// CHECK: @test_addcl
// CHECK: %{{.+}} = {{.*}} call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %x, i64 %y)
// CHECK: %{{.+}} = extractvalue { i64, i1 } %{{.+}}, 1
// CHECK: %{{.+}} = extractvalue { i64, i1 } %{{.+}}, 0
// CHECK: %{{.+}} = {{.*}} call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %{{.+}}, i64 %carryin)
// CHECK: %{{.+}} = extractvalue { i64, i1 } %{{.+}}, 1
// CHECK: %{{.+}} = extractvalue { i64, i1 } %{{.+}}, 0
// CHECK: %{{.+}} = or i1 %{{.+}}, %{{.+}}
// CHECK: %{{.+}} = zext i1 %{{.+}} to i64
// CHECK: store i64 %{{.+}}, i64* %z, align 8
unsigned long carryout;
*z = __builtin_addcl(x, y, carryin, &carryout);
return carryout;
}
unsigned long long test_addcll(unsigned long long x, unsigned long long y,
unsigned long long carryin,
unsigned long long *z) {
// CHECK: @test_addcll
// CHECK: %{{.+}} = {{.*}} call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %x, i64 %y)
// CHECK: %{{.+}} = extractvalue { i64, i1 } %{{.+}}, 1
// CHECK: %{{.+}} = extractvalue { i64, i1 } %{{.+}}, 0
// CHECK: %{{.+}} = {{.*}} call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %{{.+}}, i64 %carryin)
// CHECK: %{{.+}} = extractvalue { i64, i1 } %{{.+}}, 1
// CHECK: %{{.+}} = extractvalue { i64, i1 } %{{.+}}, 0
// CHECK: %{{.+}} = or i1 %{{.+}}, %{{.+}}
// CHECK: %{{.+}} = zext i1 %{{.+}} to i64
// CHECK: store i64 %{{.+}}, i64* %z, align 8
unsigned long long carryout;
*z = __builtin_addcll(x, y, carryin, &carryout);
return carryout;
}