Handle "homogeneous aggregates" as required by the ARM AAPCS-VFP ABI.

A homogeneous aggregate is an aggregate data structure where after flattening any nesting there are 1 to 4 elements of the same base type that is either a float, double, or Neon vector. All Neon vectors of the same size, either 64 or 128 bits, are treated as equivalent for this purpose. When using the AAPCS-VFP ABI, check for homogeneous aggregates and pass them as arguments by expanding them into a sequence of their base types. This requires extending the existing support for expanded arguments to handle not only structs, but also constant arrays and complex types. llvm-svn: 136767
2011-08-03 05:58:22 +00:00 · 2011-08-03 05:58:22 +00:00 · e826a2a56b
parent 9bfd9144e6
commit e826a2a56b
3 changed files with 238 additions and 50 deletions
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@ -313,49 +313,64 @@ CGFunctionInfo::CGFunctionInfo(unsigned _CallingConvention,

 void CodeGenTypes::GetExpandedTypes(QualType type,
                     SmallVectorImpl<llvm::Type*> &expandedTypes) {
-  const RecordType *RT = type->getAsStructureType();
-  assert(RT && "Can only expand structure types.");
-  const RecordDecl *RD = RT->getDecl();
-  assert(!RD->hasFlexibleArrayMember() &&
-         "Cannot expand structure with flexible array.");
-
-  for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end();
+  if (const ConstantArrayType *AT = Context.getAsConstantArrayType(type)) {
+    uint64_t NumElts = AT->getSize().getZExtValue();
+    for (uint64_t Elt = 0; Elt < NumElts; ++Elt)
+      GetExpandedTypes(AT->getElementType(), expandedTypes);
+  } else if (const RecordType *RT = type->getAsStructureType()) {
+    const RecordDecl *RD = RT->getDecl();
+    assert(!RD->hasFlexibleArrayMember() &&
+           "Cannot expand structure with flexible array.");
+    for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end();
         i != e; ++i) {
-    const FieldDecl *FD = *i;
-    assert(!FD->isBitField() &&
-           "Cannot expand structure with bit-field members.");
-
-    QualType fieldType = FD->getType();
-    if (fieldType->isRecordType())
-      GetExpandedTypes(fieldType, expandedTypes);
-    else
-      expandedTypes.push_back(ConvertType(fieldType));
-  }
+      const FieldDecl *FD = *i;
+      assert(!FD->isBitField() &&
+             "Cannot expand structure with bit-field members.");
+      GetExpandedTypes(FD->getType(), expandedTypes);
+    }
+  } else if (const ComplexType *CT = type->getAs<ComplexType>()) {
+    llvm::Type *EltTy = ConvertType(CT->getElementType());
+    expandedTypes.push_back(EltTy);
+    expandedTypes.push_back(EltTy);
+  } else
+    expandedTypes.push_back(ConvertType(type));
 }

 llvm::Function::arg_iterator
 CodeGenFunction::ExpandTypeFromArgs(QualType Ty, LValue LV,
                                    llvm::Function::arg_iterator AI) {
-  const RecordType *RT = Ty->getAsStructureType();
-  assert(RT && "Can only expand structure types.");
-
-  RecordDecl *RD = RT->getDecl();
  assert(LV.isSimple() &&
         "Unexpected non-simple lvalue during struct expansion.");
  llvm::Value *Addr = LV.getAddress();
-  for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end();
-         i != e; ++i) {
-    FieldDecl *FD = *i;
-    QualType FT = FD->getType();

-    // FIXME: What are the right qualifiers here?
-    LValue LV = EmitLValueForField(Addr, FD, 0);
-    if (CodeGenFunction::hasAggregateLLVMType(FT)) {
-      AI = ExpandTypeFromArgs(FT, LV, AI);
-    } else {
-      EmitStoreThroughLValue(RValue::get(AI), LV);
-      ++AI;
+  if (const ConstantArrayType *AT = getContext().getAsConstantArrayType(Ty)) {
+    unsigned NumElts = AT->getSize().getZExtValue();
+    QualType EltTy = AT->getElementType();
+    for (unsigned Elt = 0; Elt < NumElts; ++Elt) {
+      llvm::Value *EltAddr = Builder.CreateConstGEP2_32(Addr, 0, Elt);
+      LValue LV = MakeAddrLValue(EltAddr, EltTy);
+      AI = ExpandTypeFromArgs(EltTy, LV, AI);
    }
+  } else if (const RecordType *RT = Ty->getAsStructureType()) {
+    RecordDecl *RD = RT->getDecl();
+    for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end();
+         i != e; ++i) {
+      FieldDecl *FD = *i;
+      QualType FT = FD->getType();
+
+      // FIXME: What are the right qualifiers here?
+      LValue LV = EmitLValueForField(Addr, FD, 0);
+      AI = ExpandTypeFromArgs(FT, LV, AI);
+    }
+  } else if (const ComplexType *CT = Ty->getAs<ComplexType>()) {
+    QualType EltTy = CT->getElementType();
+    llvm::Value *RealAddr = Builder.CreateStructGEP(Addr, 0, "real");
+    EmitStoreThroughLValue(RValue::get(AI++), MakeAddrLValue(RealAddr, EltTy));
+    llvm::Value *ImagAddr = Builder.CreateStructGEP(Addr, 0, "imag");
+    EmitStoreThroughLValue(RValue::get(AI++), MakeAddrLValue(ImagAddr, EltTy));
+  } else {
+    EmitStoreThroughLValue(RValue::get(AI), LV);
+    ++AI;
  }

  return AI;
@ -1462,26 +1477,43 @@ static void checkArgMatches(llvm::Value *Elt, unsigned &ArgNo,
 void CodeGenFunction::ExpandTypeToArgs(QualType Ty, RValue RV,
                                       SmallVector<llvm::Value*,16> &Args,
                                       llvm::FunctionType *IRFuncTy) {
-  const RecordType *RT = Ty->getAsStructureType();
-  assert(RT && "Can only expand structure types.");
-  
-  RecordDecl *RD = RT->getDecl();
-  assert(RV.isAggregate() && "Unexpected rvalue during struct expansion");
-  llvm::Value *Addr = RV.getAggregateAddr();
-  for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end();
-       i != e; ++i) {
-    FieldDecl *FD = *i;
-    QualType FT = FD->getType();
-    
-    // FIXME: What are the right qualifiers here?
-    LValue LV = EmitLValueForField(Addr, FD, 0);
-    if (CodeGenFunction::hasAggregateLLVMType(FT)) {
-      ExpandTypeToArgs(FT, RValue::getAggregate(LV.getAddress()),
-                       Args, IRFuncTy);
-      continue;
+  if (const ConstantArrayType *AT = getContext().getAsConstantArrayType(Ty)) {
+    unsigned NumElts = AT->getSize().getZExtValue();
+    QualType EltTy = AT->getElementType();
+    llvm::Value *Addr = RV.getAggregateAddr();
+    for (unsigned Elt = 0; Elt < NumElts; ++Elt) {
+      llvm::Value *EltAddr = Builder.CreateConstGEP2_32(Addr, 0, Elt);
+      LValue LV = MakeAddrLValue(EltAddr, EltTy);
+      RValue EltRV;
+      if (CodeGenFunction::hasAggregateLLVMType(EltTy))
+        EltRV = RValue::getAggregate(LV.getAddress());
+      else
+        EltRV = EmitLoadOfLValue(LV);
+      ExpandTypeToArgs(EltTy, EltRV, Args, IRFuncTy);
    }
+  } else if (const RecordType *RT = Ty->getAsStructureType()) {
+    RecordDecl *RD = RT->getDecl();
+    assert(RV.isAggregate() && "Unexpected rvalue during struct expansion");
+    llvm::Value *Addr = RV.getAggregateAddr();
+    for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end();
+         i != e; ++i) {
+      FieldDecl *FD = *i;
+      QualType FT = FD->getType();
    
-    RValue RV = EmitLoadOfLValue(LV);
+      // FIXME: What are the right qualifiers here?
+      LValue LV = EmitLValueForField(Addr, FD, 0);
+      RValue FldRV;
+      if (CodeGenFunction::hasAggregateLLVMType(FT))
+        FldRV = RValue::getAggregate(LV.getAddress());
+      else
+        FldRV = EmitLoadOfLValue(LV);
+      ExpandTypeToArgs(FT, FldRV, Args, IRFuncTy);
+    }
+  } else if (isa<ComplexType>(Ty)) {
+    ComplexPairTy CV = RV.getComplexVal();
+    Args.push_back(CV.first);
+    Args.push_back(CV.second);
+  } else {
    assert(RV.isScalar() &&
           "Unexpected non-scalar rvalue during struct expansion.");

--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@ -2379,6 +2379,73 @@ void ARMABIInfo::computeInfo(CGFunctionInfo &FI) const {
  }
 }

+/// isHomogeneousAggregate - Return true if a type is an AAPCS-VFP homogeneous
+/// aggregate.  If HAMembers is non-null, the number of base elements
+/// contained in the type is returned through it; this is used for the
+/// recursive calls that check aggregate component types.
+static bool isHomogeneousAggregate(QualType Ty, const Type *&Base,
+                                   ASTContext &Context,
+                                   uint64_t *HAMembers = 0) {
+  uint64_t Members;
+  if (const ConstantArrayType *AT = Context.getAsConstantArrayType(Ty)) {
+    if (!isHomogeneousAggregate(AT->getElementType(), Base, Context, &Members))
+      return false;
+    Members *= AT->getSize().getZExtValue();
+  } else if (const RecordType *RT = Ty->getAs<RecordType>()) {
+    const RecordDecl *RD = RT->getDecl();
+    if (RD->isUnion() || RD->hasFlexibleArrayMember())
+      return false;
+    if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
+      if (!CXXRD->isAggregate())
+        return false;
+    }
+    Members = 0;
+    for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end();
+         i != e; ++i) {
+      const FieldDecl *FD = *i;
+      uint64_t FldMembers;
+      if (!isHomogeneousAggregate(FD->getType(), Base, Context, &FldMembers))
+        return false;
+      Members += FldMembers;
+    }
+  } else {
+    Members = 1;
+    if (const ComplexType *CT = Ty->getAs<ComplexType>()) {
+      Members = 2;
+      Ty = CT->getElementType();
+    }
+
+    // Homogeneous aggregates for AAPCS-VFP must have base types of float,
+    // double, or 64-bit or 128-bit vectors.
+    if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
+      if (BT->getKind() != BuiltinType::Float && 
+          BT->getKind() != BuiltinType::Double)
+        return false;
+    } else if (const VectorType *VT = Ty->getAs<VectorType>()) {
+      unsigned VecSize = Context.getTypeSize(VT);
+      if (VecSize != 64 && VecSize != 128)
+        return false;
+    } else {
+      return false;
+    }
+
+    // The base type must be the same for all members.  Vector types of the
+    // same total size are treated as being equivalent here.
+    const Type *TyPtr = Ty.getTypePtr();
+    if (!Base)
+      Base = TyPtr;
+    if (Base != TyPtr &&
+        (!Base->isVectorType() || !TyPtr->isVectorType() ||
+         Context.getTypeSize(Base) != Context.getTypeSize(TyPtr)))
+      return false;
+  }
+
+  // Homogeneous Aggregates can have at most 4 members of the base type.
+  if (HAMembers)
+    *HAMembers = Members;
+  return (Members <= 4);
+}
+
 ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty) const {
  if (!isAggregateTypeForABI(Ty)) {
    // Treat an enum type as its underlying type.
@ -2398,6 +2465,13 @@ ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty) const {
  if (isRecordWithNonTrivialDestructorOrCopyConstructor(Ty))
    return ABIArgInfo::getIndirect(0, /*ByVal=*/false);

+  if (getABIKind() == ARMABIInfo::AAPCS_VFP) {
+    // Homogeneous Aggregates need to be expanded.
+    const Type *Base = 0;
+    if (isHomogeneousAggregate(Ty, Base, getContext()))
+      return ABIArgInfo::getExpand();
+  }
+
  // Otherwise, pass by coercing to a structure of the appropriate size.
  //
  // FIXME: This is kind of nasty... but there isn't much choice because the ARM
--- a/clang/test/CodeGen/arm-aapcs-vfp.c
+++ b/clang/test/CodeGen/arm-aapcs-vfp.c
@ -0,0 +1,82 @@
+// RUN: %clang_cc1 -triple thumbv7-apple-darwin9 \
+// RUN:   -target-abi aapcs \
+// RUN:   -target-cpu cortex-a8 \
+// RUN:   -mfloat-abi hard \
+// RUN:   -ffreestanding \
+// RUN:   -emit-llvm -w -o - %s | FileCheck %s
+
+#include <arm_neon.h>
+
+struct homogeneous_struct {
+  float f[2];
+  float f3;
+  float f4;
+};
+// CHECK: define arm_aapcs_vfpcc void @test_struct(float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}})
+extern void struct_callee(struct homogeneous_struct);
+void test_struct(struct homogeneous_struct arg) {
+  struct_callee(arg);
+}
+
+struct nested_array {
+  double d[4];
+};
+// CHECK: define arm_aapcs_vfpcc void @test_array(double %{{.*}}, double %{{.*}}, double %{{.*}}, double %{{.*}})
+extern void array_callee(struct nested_array);
+void test_array(struct nested_array arg) {
+  array_callee(arg);
+}
+
+extern void complex_callee(__complex__ double);
+// CHECK: define arm_aapcs_vfpcc void @test_complex(double %{{.*}}, double %{{.*}})
+void test_complex(__complex__ double cd) {
+  complex_callee(cd);
+}
+
+// Structs with more than 4 elements of the base type are not treated
+// as homogeneous aggregates.  Test that.
+
+struct big_struct {
+  float f1;
+  float f[2];
+  float f3;
+  float f4;
+};
+// CHECK: define arm_aapcs_vfpcc void @test_big([5 x i32] %{{.*}})
+extern void big_callee(struct big_struct);
+void test_big(struct big_struct arg) {
+  big_callee(arg);
+}
+
+// Make sure that aggregates with multiple base types are not treated as
+// homogeneous aggregates.
+
+struct heterogeneous_struct {
+  float f1;
+  int i2;
+};
+// CHECK: define arm_aapcs_vfpcc void @test_hetero([2 x i32] %{{.*}})
+extern void hetero_callee(struct heterogeneous_struct);
+void test_hetero(struct heterogeneous_struct arg) {
+  hetero_callee(arg);
+}
+
+// Neon multi-vector types are homogeneous aggregates.
+// CHECK: define arm_aapcs_vfpcc <16 x i8> @f0(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+int8x16_t f0(int8x16x4_t v4) {
+  return vaddq_s8(v4.val[0], v4.val[3]);
+}
+
+// ...and it doesn't matter whether the vectors are exactly the same, as long
+// as they have the same size.
+
+struct neon_struct {
+  int8x8x2_t v12;
+  int32x2_t v3;
+  int16x4_t v4;
+};
+// CHECK: define arm_aapcs_vfpcc void @test_neon(<8 x i8> %{{.*}}, <8 x i8> %{{.*}}, <2 x i32> %{{.*}}, <4 x i16> %{{.*}})
+extern void neon_callee(struct neon_struct);
+void test_neon(struct neon_struct arg) {
+  neon_callee(arg);
+}