forked from OSchip/llvm-project
Added builtins for multiprecision adds.
We lower all of these intrinsics into a 2x chained usage of uadd.with.overflow. llvm-svn: 172341
This commit is contained in:
parent
1f5a4323e9
commit
54398015bf
|
@ -925,5 +925,11 @@ LIBBUILTIN(_Block_object_dispose, "vvC*iC", "f", "Blocks.h", ALL_LANGUAGES)
|
|||
// Annotation function
|
||||
BUILTIN(__builtin_annotation, "v.", "tn")
|
||||
|
||||
// Multiprecision Arithmetic Builtins.
|
||||
BUILTIN(__builtin_addcs, "UsUsCUsCUsCUs*", "n")
|
||||
BUILTIN(__builtin_addc, "UiUiCUiCUiCUi*", "n")
|
||||
BUILTIN(__builtin_addcl, "ULiULiCULiCULiCULi*", "n")
|
||||
BUILTIN(__builtin_addcll, "ULLiULLiCULLiCULLiCULLi*", "n")
|
||||
|
||||
#undef BUILTIN
|
||||
#undef LIBBUILTIN
|
||||
|
|
|
@ -169,6 +169,31 @@ static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *Fn,
|
|||
ReturnValueSlot(), E->arg_begin(), E->arg_end(), Fn);
|
||||
}
|
||||
|
||||
/// \brief Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
|
||||
/// depending on IntrinsicID.
|
||||
///
|
||||
/// \arg CGF The current codegen function.
|
||||
/// \arg IntrinsicID The ID for the Intrinsic we wish to generate.
|
||||
/// \arg X The first argument to the llvm.*.with.overflow.*.
|
||||
/// \arg Y The second argument to the llvm.*.with.overflow.*.
|
||||
/// \arg Carry The carry returned by the llvm.*.with.overflow.*.
|
||||
/// \returns The result (i.e. sum/product) returned by the intrinsic.
|
||||
static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF,
|
||||
const llvm::Intrinsic::ID IntrinsicID,
|
||||
llvm::Value *X, llvm::Value *Y,
|
||||
llvm::Value *&Carry) {
|
||||
// Make sure we have integers of the same width.
|
||||
assert(X->getType() == Y->getType() &&
|
||||
"Arguments must be the same type. (Did you forget to make sure both "
|
||||
"arguments have the same integer width?)");
|
||||
|
||||
ArrayRef<llvm::Type *> Type(X->getType());
|
||||
llvm::Value *Callee = CGF.CGM.getIntrinsic(IntrinsicID, Type);
|
||||
llvm::Value *Tmp = CGF.Builder.CreateCall2(Callee, X, Y);
|
||||
Carry = CGF.Builder.CreateExtractValue(Tmp, 1);
|
||||
return CGF.Builder.CreateExtractValue(Tmp, 0);
|
||||
}
|
||||
|
||||
RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
|
||||
unsigned BuiltinID, const CallExpr *E) {
|
||||
// See if we can constant fold this builtin. If so, don't emit it at all.
|
||||
|
@ -1321,6 +1346,51 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
|
|||
StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
|
||||
return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc()));
|
||||
}
|
||||
case Builtin::BI__builtin_addcs:
|
||||
case Builtin::BI__builtin_addc:
|
||||
case Builtin::BI__builtin_addcl:
|
||||
case Builtin::BI__builtin_addcll: {
|
||||
|
||||
// We translate all of these builtins from expressions of the form:
|
||||
// int x = ..., y = ..., carryin = ..., carryout, result;
|
||||
// result = __builtin_addc(x, y, carryin, &carryout);
|
||||
//
|
||||
// to LLVM IR of the form:
|
||||
//
|
||||
// %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
|
||||
// %tmpsum1 = extractvalue {i32, i1} %tmp1, 0
|
||||
// %carry1 = extractvalue {i32, i1} %tmp1, 1
|
||||
// %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,
|
||||
// i32 %carryin)
|
||||
// %result = extractvalue {i32, i1} %tmp2, 0
|
||||
// %carry2 = extractvalue {i32, i1} %tmp2, 1
|
||||
// %tmp3 = or i1 %carry1, %carry2
|
||||
// %tmp4 = zext i1 %tmp3 to i32
|
||||
// store i32 %tmp4, i32* %carryout
|
||||
|
||||
// Scalarize our inputs.
|
||||
llvm::Value *X = EmitScalarExpr(E->getArg(0));
|
||||
llvm::Value *Y = EmitScalarExpr(E->getArg(1));
|
||||
llvm::Value *Carryin = EmitScalarExpr(E->getArg(2));
|
||||
std::pair<llvm::Value*, unsigned> CarryOutPtr =
|
||||
EmitPointerWithAlignment(E->getArg(3));
|
||||
|
||||
const llvm::Intrinsic::ID IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
|
||||
|
||||
// Construct our resulting LLVM IR expression.
|
||||
llvm::Value *Carry1;
|
||||
llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId,
|
||||
X, Y, Carry1);
|
||||
llvm::Value *Carry2;
|
||||
llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId,
|
||||
Sum1, Carryin, Carry2);
|
||||
llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2),
|
||||
X->getType());
|
||||
llvm::StoreInst *CarryOutStore = Builder.CreateStore(CarryOut,
|
||||
CarryOutPtr.first);
|
||||
CarryOutStore->setAlignment(CarryOutPtr.second);
|
||||
return RValue::get(Sum2);
|
||||
}
|
||||
case Builtin::BI__noop:
|
||||
return RValue::get(0);
|
||||
}
|
||||
|
|
|
@ -0,0 +1,74 @@
|
|||
// RUN: %clang_cc1 -emit-llvm -x c %s -o - -O3 | FileCheck %s
|
||||
|
||||
unsigned short test_addcs(unsigned short x, unsigned short y,
|
||||
unsigned short carryin, unsigned short *z) {
|
||||
// CHECK: @test_addcs
|
||||
// CHECK: %{{.+}} = {{.*}} call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 %x, i16 %y)
|
||||
// CHECK: %{{.+}} = extractvalue { i16, i1 } %{{.+}}, 1
|
||||
// CHECK: %{{.+}} = extractvalue { i16, i1 } %{{.+}}, 0
|
||||
// CHECK: %{{.+}} = {{.*}} call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 %{{.+}}, i16 %carryin)
|
||||
// CHECK: %{{.+}} = extractvalue { i16, i1 } %{{.+}}, 1
|
||||
// CHECK: %{{.+}} = extractvalue { i16, i1 } %{{.+}}, 0
|
||||
// CHECK: %{{.+}} = or i1 %{{.+}}, %{{.+}}
|
||||
// CHECK: %{{.+}} = zext i1 %{{.+}} to i16
|
||||
// CHECK: store i16 %{{.+}}, i16* %z, align 2
|
||||
|
||||
unsigned short carryout;
|
||||
*z = __builtin_addcs(x, y, carryin, &carryout);
|
||||
|
||||
return carryout;
|
||||
}
|
||||
|
||||
unsigned test_addc(unsigned x, unsigned y, unsigned carryin, unsigned *z) {
|
||||
// CHECK: @test_addc
|
||||
// CHECK: %{{.+}} = {{.*}} call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
|
||||
// CHECK: %{{.+}} = extractvalue { i32, i1 } %{{.+}}, 1
|
||||
// CHECK: %{{.+}} = extractvalue { i32, i1 } %{{.+}}, 0
|
||||
// CHECK: %{{.+}} = {{.*}} call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %{{.+}}, i32 %carryin)
|
||||
// CHECK: %{{.+}} = extractvalue { i32, i1 } %{{.+}}, 1
|
||||
// CHECK: %{{.+}} = extractvalue { i32, i1 } %{{.+}}, 0
|
||||
// CHECK: %{{.+}} = or i1 %{{.+}}, %{{.+}}
|
||||
// CHECK: %{{.+}} = zext i1 %{{.+}} to i32
|
||||
// CHECK: store i32 %{{.+}}, i32* %z, align 4
|
||||
unsigned carryout;
|
||||
*z = __builtin_addc(x, y, carryin, &carryout);
|
||||
|
||||
return carryout;
|
||||
}
|
||||
|
||||
unsigned long test_addcl(unsigned long x, unsigned long y,
|
||||
unsigned long carryin, unsigned long *z) {
|
||||
// CHECK: @test_addcl
|
||||
// CHECK: %{{.+}} = {{.*}} call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %x, i64 %y)
|
||||
// CHECK: %{{.+}} = extractvalue { i64, i1 } %{{.+}}, 1
|
||||
// CHECK: %{{.+}} = extractvalue { i64, i1 } %{{.+}}, 0
|
||||
// CHECK: %{{.+}} = {{.*}} call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %{{.+}}, i64 %carryin)
|
||||
// CHECK: %{{.+}} = extractvalue { i64, i1 } %{{.+}}, 1
|
||||
// CHECK: %{{.+}} = extractvalue { i64, i1 } %{{.+}}, 0
|
||||
// CHECK: %{{.+}} = or i1 %{{.+}}, %{{.+}}
|
||||
// CHECK: %{{.+}} = zext i1 %{{.+}} to i64
|
||||
// CHECK: store i64 %{{.+}}, i64* %z, align 8
|
||||
unsigned long carryout;
|
||||
*z = __builtin_addcl(x, y, carryin, &carryout);
|
||||
|
||||
return carryout;
|
||||
}
|
||||
|
||||
unsigned long long test_addcll(unsigned long long x, unsigned long long y,
|
||||
unsigned long long carryin,
|
||||
unsigned long long *z) {
|
||||
// CHECK: @test_addcll
|
||||
// CHECK: %{{.+}} = {{.*}} call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %x, i64 %y)
|
||||
// CHECK: %{{.+}} = extractvalue { i64, i1 } %{{.+}}, 1
|
||||
// CHECK: %{{.+}} = extractvalue { i64, i1 } %{{.+}}, 0
|
||||
// CHECK: %{{.+}} = {{.*}} call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %{{.+}}, i64 %carryin)
|
||||
// CHECK: %{{.+}} = extractvalue { i64, i1 } %{{.+}}, 1
|
||||
// CHECK: %{{.+}} = extractvalue { i64, i1 } %{{.+}}, 0
|
||||
// CHECK: %{{.+}} = or i1 %{{.+}}, %{{.+}}
|
||||
// CHECK: %{{.+}} = zext i1 %{{.+}} to i64
|
||||
// CHECK: store i64 %{{.+}}, i64* %z, align 8
|
||||
unsigned long long carryout;
|
||||
*z = __builtin_addcll(x, y, carryin, &carryout);
|
||||
|
||||
return carryout;
|
||||
}
|
Loading…
Reference in New Issue