forked from OSchip/llvm-project
[Clang][AArch64] Inline assembly support for the ACLE type 'data512_t'
In LLVM IR terms the ACLE type 'data512_t' is essentially an aggregate type { [8 x i64] }. When emitting code for inline assembly operands, clang tries to scalarize aggregate types to an integer of the equivalent length, otherwise it passes them by-reference. This patch adds a target hook to tell whether a given inline assembly operand is scalarizable so that clang can emit code to pass/return it by-value. Differential Revision: https://reviews.llvm.org/D94098
This commit is contained in:
parent
fb09f365ae
commit
29b263a34f
|
@ -431,7 +431,8 @@ bool AArch64TargetInfo::hasFeature(StringRef Feature) const {
|
|||
Feature == "sve2-aes" || Feature == "sve2-sha3" ||
|
||||
Feature == "sve2-sm4" || Feature == "f64mm" || Feature == "f32mm" ||
|
||||
Feature == "i8mm" || Feature == "bf16") &&
|
||||
(FPU & SveMode));
|
||||
(FPU & SveMode)) ||
|
||||
(Feature == "ls64" && HasLS64);
|
||||
}
|
||||
|
||||
bool AArch64TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
|
||||
|
@ -752,6 +753,9 @@ bool AArch64TargetInfo::validateConstraintModifier(
|
|||
if (Size == 64)
|
||||
return true;
|
||||
|
||||
if (Size == 512)
|
||||
return HasLS64;
|
||||
|
||||
SuggestedModifier = "w";
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -2097,7 +2097,8 @@ CodeGenFunction::EmitAsmInputLValue(const TargetInfo::ConstraintInfo &Info,
|
|||
} else {
|
||||
llvm::Type *Ty = ConvertType(InputType);
|
||||
uint64_t Size = CGM.getDataLayout().getTypeSizeInBits(Ty);
|
||||
if (Size <= 64 && llvm::isPowerOf2_64(Size)) {
|
||||
if ((Size <= 64 && llvm::isPowerOf2_64(Size)) ||
|
||||
getTargetHooks().isScalarizableAsmOperand(*this, Ty)) {
|
||||
Ty = llvm::IntegerType::get(getLLVMContext(), Size);
|
||||
Ty = llvm::PointerType::getUnqual(Ty);
|
||||
|
||||
|
@ -2320,23 +2321,28 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
|
|||
|
||||
// If this is a register output, then make the inline asm return it
|
||||
// by-value. If this is a memory result, return the value by-reference.
|
||||
bool isScalarizableAggregate =
|
||||
hasAggregateEvaluationKind(OutExpr->getType());
|
||||
if (!Info.allowsMemory() && (hasScalarEvaluationKind(OutExpr->getType()) ||
|
||||
isScalarizableAggregate)) {
|
||||
QualType QTy = OutExpr->getType();
|
||||
const bool IsScalarOrAggregate = hasScalarEvaluationKind(QTy) ||
|
||||
hasAggregateEvaluationKind(QTy);
|
||||
if (!Info.allowsMemory() && IsScalarOrAggregate) {
|
||||
|
||||
Constraints += "=" + OutputConstraint;
|
||||
ResultRegQualTys.push_back(OutExpr->getType());
|
||||
ResultRegQualTys.push_back(QTy);
|
||||
ResultRegDests.push_back(Dest);
|
||||
ResultTruncRegTypes.push_back(ConvertTypeForMem(OutExpr->getType()));
|
||||
if (Info.allowsRegister() && isScalarizableAggregate) {
|
||||
ResultTypeRequiresCast.push_back(true);
|
||||
unsigned Size = getContext().getTypeSize(OutExpr->getType());
|
||||
llvm::Type *ConvTy = llvm::IntegerType::get(getLLVMContext(), Size);
|
||||
ResultRegTypes.push_back(ConvTy);
|
||||
} else {
|
||||
ResultTypeRequiresCast.push_back(false);
|
||||
ResultRegTypes.push_back(ResultTruncRegTypes.back());
|
||||
|
||||
llvm::Type *Ty = ConvertTypeForMem(QTy);
|
||||
const bool RequiresCast = Info.allowsRegister() &&
|
||||
(getTargetHooks().isScalarizableAsmOperand(*this, Ty) ||
|
||||
Ty->isAggregateType());
|
||||
|
||||
ResultTruncRegTypes.push_back(Ty);
|
||||
ResultTypeRequiresCast.push_back(RequiresCast);
|
||||
|
||||
if (RequiresCast) {
|
||||
unsigned Size = getContext().getTypeSize(QTy);
|
||||
Ty = llvm::IntegerType::get(getLLVMContext(), Size);
|
||||
}
|
||||
ResultRegTypes.push_back(Ty);
|
||||
// If this output is tied to an input, and if the input is larger, then
|
||||
// we need to set the actual result type of the inline asm node to be the
|
||||
// same as the input type.
|
||||
|
@ -2638,11 +2644,11 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
|
|||
assert(ResultTypeRequiresCast.size() <= ResultRegDests.size());
|
||||
for (unsigned i = 0, e = RegResults.size(); i != e; ++i) {
|
||||
llvm::Value *Tmp = RegResults[i];
|
||||
llvm::Type *TruncTy = ResultTruncRegTypes[i];
|
||||
|
||||
// If the result type of the LLVM IR asm doesn't match the result type of
|
||||
// the expression, do the conversion.
|
||||
if (ResultRegTypes[i] != ResultTruncRegTypes[i]) {
|
||||
llvm::Type *TruncTy = ResultTruncRegTypes[i];
|
||||
|
||||
// Truncate the integer result to the right size, note that TruncTy can be
|
||||
// a pointer.
|
||||
|
@ -2672,6 +2678,11 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
|
|||
unsigned Size = getContext().getTypeSize(ResultRegQualTys[i]);
|
||||
Address A = Builder.CreateBitCast(Dest.getAddress(*this),
|
||||
ResultRegTypes[i]->getPointerTo());
|
||||
if (getTargetHooks().isScalarizableAsmOperand(*this, TruncTy)) {
|
||||
Builder.CreateStore(Tmp, A);
|
||||
continue;
|
||||
}
|
||||
|
||||
QualType Ty = getContext().getIntTypeForBitwidth(Size, /*Signed*/ false);
|
||||
if (Ty.isNull()) {
|
||||
const Expr *OutExpr = S.getOutputExpr(i);
|
||||
|
|
|
@ -5526,6 +5526,20 @@ public:
|
|||
Fn->addFnAttr("branch-target-enforcement",
|
||||
BPI.BranchTargetEnforcement ? "true" : "false");
|
||||
}
|
||||
|
||||
bool isScalarizableAsmOperand(CodeGen::CodeGenFunction &CGF,
|
||||
llvm::Type *Ty) const override {
|
||||
if (CGF.getTarget().hasFeature("ls64")) {
|
||||
auto *ST = dyn_cast<llvm::StructType>(Ty);
|
||||
if (ST && ST->getNumElements() == 1) {
|
||||
auto *AT = dyn_cast<llvm::ArrayType>(ST->getElementType(0));
|
||||
if (AT && AT->getNumElements() == 8 &&
|
||||
AT->getElementType()->isIntegerTy(64))
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return TargetCodeGenInfo::isScalarizableAsmOperand(CGF, Ty);
|
||||
}
|
||||
};
|
||||
|
||||
class WindowsAArch64TargetCodeGenInfo : public AArch64TargetCodeGenInfo {
|
||||
|
|
|
@ -148,6 +148,13 @@ public:
|
|||
return Ty;
|
||||
}
|
||||
|
||||
/// Target hook to decide whether an inline asm operand can be passed
|
||||
/// by value.
|
||||
virtual bool isScalarizableAsmOperand(CodeGen::CodeGenFunction &CGF,
|
||||
llvm::Type *Ty) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Adds constraints and types for result registers.
|
||||
virtual void addReturnRegisterOutputs(
|
||||
CodeGen::CodeGenFunction &CGF, CodeGen::LValue ReturnValue,
|
||||
|
|
|
@ -0,0 +1,84 @@
|
|||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
|
||||
// RUN: %clang_cc1 -triple aarch64-eabi -target-feature +ls64 -O1 -S -emit-llvm -x c %s -o - | FileCheck %s
|
||||
|
||||
struct foo { unsigned long long x[8]; };
|
||||
|
||||
// CHECK-LABEL: @load(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = call i512 asm sideeffect "ld64b $0,[$1]", "=r,r,~{memory}"(i8* [[ADDR:%.*]]) #[[ATTR1:[0-9]+]], !srcloc !6
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = bitcast %struct.foo* [[OUTPUT:%.*]] to i512*
|
||||
// CHECK-NEXT: store i512 [[TMP0]], i512* [[TMP1]], align 8
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
void load(struct foo *output, void *addr)
|
||||
{
|
||||
__asm__ volatile ("ld64b %0,[%1]" : "=r" (*output) : "r" (addr) : "memory");
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @store(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.foo* [[INPUT:%.*]] to i512*
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = load i512, i512* [[TMP0]], align 8
|
||||
// CHECK-NEXT: call void asm sideeffect "st64b $0,[$1]", "r,r,~{memory}"(i512 [[TMP1]], i8* [[ADDR:%.*]]) #[[ATTR1]], !srcloc !7
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
void store(const struct foo *input, void *addr)
|
||||
{
|
||||
__asm__ volatile ("st64b %0,[%1]" : : "r" (*input), "r" (addr) : "memory" );
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @store2(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[IN:%.*]], align 4, !tbaa [[TBAA8:![0-9]+]]
|
||||
// CHECK-NEXT: [[CONV:%.*]] = sext i32 [[TMP0]] to i64
|
||||
// CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[IN]], i64 1
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX1]], align 4, !tbaa [[TBAA8]]
|
||||
// CHECK-NEXT: [[CONV2:%.*]] = sext i32 [[TMP1]] to i64
|
||||
// CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, i32* [[IN]], i64 4
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX4]], align 4, !tbaa [[TBAA8]]
|
||||
// CHECK-NEXT: [[CONV5:%.*]] = sext i32 [[TMP2]] to i64
|
||||
// CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[IN]], i64 16
|
||||
// CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX7]], align 4, !tbaa [[TBAA8]]
|
||||
// CHECK-NEXT: [[CONV8:%.*]] = sext i32 [[TMP3]] to i64
|
||||
// CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, i32* [[IN]], i64 25
|
||||
// CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX10]], align 4, !tbaa [[TBAA8]]
|
||||
// CHECK-NEXT: [[CONV11:%.*]] = sext i32 [[TMP4]] to i64
|
||||
// CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, i32* [[IN]], i64 36
|
||||
// CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX13]], align 4, !tbaa [[TBAA8]]
|
||||
// CHECK-NEXT: [[CONV14:%.*]] = sext i32 [[TMP5]] to i64
|
||||
// CHECK-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds i32, i32* [[IN]], i64 49
|
||||
// CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX16]], align 4, !tbaa [[TBAA8]]
|
||||
// CHECK-NEXT: [[CONV17:%.*]] = sext i32 [[TMP6]] to i64
|
||||
// CHECK-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds i32, i32* [[IN]], i64 64
|
||||
// CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX19]], align 4, !tbaa [[TBAA8]]
|
||||
// CHECK-NEXT: [[CONV20:%.*]] = sext i32 [[TMP7]] to i64
|
||||
// CHECK-NEXT: [[S_SROA_10_0_INSERT_EXT:%.*]] = zext i64 [[CONV20]] to i512
|
||||
// CHECK-NEXT: [[S_SROA_10_0_INSERT_SHIFT:%.*]] = shl nuw i512 [[S_SROA_10_0_INSERT_EXT]], 448
|
||||
// CHECK-NEXT: [[S_SROA_9_0_INSERT_EXT:%.*]] = zext i64 [[CONV17]] to i512
|
||||
// CHECK-NEXT: [[S_SROA_9_0_INSERT_SHIFT:%.*]] = shl nuw nsw i512 [[S_SROA_9_0_INSERT_EXT]], 384
|
||||
// CHECK-NEXT: [[S_SROA_9_0_INSERT_INSERT:%.*]] = or i512 [[S_SROA_10_0_INSERT_SHIFT]], [[S_SROA_9_0_INSERT_SHIFT]]
|
||||
// CHECK-NEXT: [[S_SROA_8_0_INSERT_EXT:%.*]] = zext i64 [[CONV14]] to i512
|
||||
// CHECK-NEXT: [[S_SROA_8_0_INSERT_SHIFT:%.*]] = shl nuw nsw i512 [[S_SROA_8_0_INSERT_EXT]], 320
|
||||
// CHECK-NEXT: [[S_SROA_8_0_INSERT_INSERT:%.*]] = or i512 [[S_SROA_9_0_INSERT_INSERT]], [[S_SROA_8_0_INSERT_SHIFT]]
|
||||
// CHECK-NEXT: [[S_SROA_7_0_INSERT_EXT:%.*]] = zext i64 [[CONV11]] to i512
|
||||
// CHECK-NEXT: [[S_SROA_7_0_INSERT_SHIFT:%.*]] = shl nuw nsw i512 [[S_SROA_7_0_INSERT_EXT]], 256
|
||||
// CHECK-NEXT: [[S_SROA_7_0_INSERT_INSERT:%.*]] = or i512 [[S_SROA_8_0_INSERT_INSERT]], [[S_SROA_7_0_INSERT_SHIFT]]
|
||||
// CHECK-NEXT: [[S_SROA_6_0_INSERT_EXT:%.*]] = zext i64 [[CONV8]] to i512
|
||||
// CHECK-NEXT: [[S_SROA_6_0_INSERT_SHIFT:%.*]] = shl nuw nsw i512 [[S_SROA_6_0_INSERT_EXT]], 192
|
||||
// CHECK-NEXT: [[S_SROA_6_0_INSERT_INSERT:%.*]] = or i512 [[S_SROA_7_0_INSERT_INSERT]], [[S_SROA_6_0_INSERT_SHIFT]]
|
||||
// CHECK-NEXT: [[S_SROA_5_0_INSERT_EXT:%.*]] = zext i64 [[CONV5]] to i512
|
||||
// CHECK-NEXT: [[S_SROA_5_0_INSERT_SHIFT:%.*]] = shl nuw nsw i512 [[S_SROA_5_0_INSERT_EXT]], 128
|
||||
// CHECK-NEXT: [[S_SROA_4_0_INSERT_EXT:%.*]] = zext i64 [[CONV2]] to i512
|
||||
// CHECK-NEXT: [[S_SROA_4_0_INSERT_SHIFT:%.*]] = shl nuw nsw i512 [[S_SROA_4_0_INSERT_EXT]], 64
|
||||
// CHECK-NEXT: [[S_SROA_4_0_INSERT_MASK:%.*]] = or i512 [[S_SROA_6_0_INSERT_INSERT]], [[S_SROA_5_0_INSERT_SHIFT]]
|
||||
// CHECK-NEXT: [[S_SROA_0_0_INSERT_EXT:%.*]] = zext i64 [[CONV]] to i512
|
||||
// CHECK-NEXT: [[S_SROA_0_0_INSERT_MASK:%.*]] = or i512 [[S_SROA_4_0_INSERT_MASK]], [[S_SROA_4_0_INSERT_SHIFT]]
|
||||
// CHECK-NEXT: [[S_SROA_0_0_INSERT_INSERT:%.*]] = or i512 [[S_SROA_0_0_INSERT_MASK]], [[S_SROA_0_0_INSERT_EXT]]
|
||||
// CHECK-NEXT: call void asm sideeffect "st64b $0,[$1]", "r,r,~{memory}"(i512 [[S_SROA_0_0_INSERT_INSERT]], i8* [[ADDR:%.*]]) #[[ATTR1]], !srcloc !12
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
void store2(int *in, void *addr)
|
||||
{
|
||||
struct foo s = { in[0], in[1], in[4], in[16], in[25], in[36], in[49], in[64] };
|
||||
__asm__ volatile ("st64b %0,[%1]" : : "r" (s), "r" (addr) : "memory" );
|
||||
}
|
Loading…
Reference in New Issue