llvm-project/clang/test/CodeGen/align_value.cpp

// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s | FileCheck %s

typedef double * __attribute__((align_value(64))) aligned_double;

void foo(aligned_double x, double * y __attribute__((align_value(32))),
         double & z __attribute__((align_value(128)))) { };
// CHECK: define void @_Z3fooPdS_Rd(double* align 64 %x, double* align 32 %y, double* dereferenceable(8) align 128 %z)

struct ad_struct {
  aligned_double a;
};

double *foo(ad_struct& x) {
// CHECK-LABEL: @_Z3fooR9ad_struct

// CHECK: [[PTRINT1:%.+]] = ptrtoint
// CHECK: [[MASKEDPTR1:%.+]] = and i64 [[PTRINT1]], 63
// CHECK: [[MASKCOND1:%.+]] = icmp eq i64 [[MASKEDPTR1]], 0
// CHECK: call void @llvm.assume(i1 [[MASKCOND1]])
  return x.a;
}

double *goo(ad_struct *x) {
// CHECK-LABEL: @_Z3gooP9ad_struct

// CHECK: [[PTRINT2:%.+]] = ptrtoint
// CHECK: [[MASKEDPTR2:%.+]] = and i64 [[PTRINT2]], 63
// CHECK: [[MASKCOND2:%.+]] = icmp eq i64 [[MASKEDPTR2]], 0
// CHECK: call void @llvm.assume(i1 [[MASKCOND2]])
  return x->a;
}

double *bar(aligned_double *x) {
// CHECK-LABEL: @_Z3barPPd

// CHECK: [[PTRINT3:%.+]] = ptrtoint
// CHECK: [[MASKEDPTR3:%.+]] = and i64 [[PTRINT3]], 63
// CHECK: [[MASKCOND3:%.+]] = icmp eq i64 [[MASKEDPTR3]], 0
// CHECK: call void @llvm.assume(i1 [[MASKCOND3]])
  return *x;
}

double *car(aligned_double &x) {
// CHECK-LABEL: @_Z3carRPd

// CHECK: [[PTRINT4:%.+]] = ptrtoint
// CHECK: [[MASKEDPTR4:%.+]] = and i64 [[PTRINT4]], 63
// CHECK: [[MASKCOND4:%.+]] = icmp eq i64 [[MASKEDPTR4]], 0
// CHECK: call void @llvm.assume(i1 [[MASKCOND4]])
  return x;
}

double *dar(aligned_double *x) {
// CHECK-LABEL: @_Z3darPPd

// CHECK: [[PTRINT5:%.+]] = ptrtoint
// CHECK: [[MASKEDPTR5:%.+]] = and i64 [[PTRINT5]], 63
// CHECK: [[MASKCOND5:%.+]] = icmp eq i64 [[MASKEDPTR5]], 0
// CHECK: call void @llvm.assume(i1 [[MASKCOND5]])
  return x[5];
}

aligned_double eep();
double *ret() {
// CHECK-LABEL: @_Z3retv

// CHECK: [[PTRINT6:%.+]] = ptrtoint
// CHECK: [[MASKEDPTR6:%.+]] = and i64 [[PTRINT6]], 63
// CHECK: [[MASKCOND6:%.+]] = icmp eq i64 [[MASKEDPTR6]], 0
// CHECK: call void @llvm.assume(i1 [[MASKCOND6]])
  return eep();
}

double **no1(aligned_double *x) {
// CHECK-LABEL: @_Z3no1PPd
  return x;
// CHECK-NOT: call void @llvm.assume
}

double *&no2(aligned_double &x) {
// CHECK-LABEL: @_Z3no2RPd
  return x;
// CHECK-NOT: call void @llvm.assume
}

double **no3(aligned_double &x) {
// CHECK-LABEL: @_Z3no3RPd
  return &x;
// CHECK-NOT: call void @llvm.assume
}

double no3(aligned_double x) {
// CHECK-LABEL: @_Z3no3Pd
  return *x;
// CHECK-NOT: call void @llvm.assume
}

double *no4(aligned_double x) {
// CHECK-LABEL: @_Z3no4Pd
  return x;
// CHECK-NOT: call void @llvm.assume
}
Initial support for the align_value attribute This adds support for the align_value attribute. This attribute is supported by Intel's compiler (versions 14.0+), and several of my HPC users have requested support in Clang. It specifies an alignment assumption on the values to which a pointer points, and is used by numerical libraries to encourage efficient generation of vector code. Of course, we already have an aligned attribute that can specify enhanced alignment for a type, so why is this additional attribute important? The problem is that if you want to specify that an input array of T is, say, 64-byte aligned, you could try this: typedef double aligned_double attribute((aligned(64))); void foo(aligned_double P) { double x = P[0]; // This is fine. double y = P[1]; // What alignment did those doubles have again? } the access here to P[1] causes problems. P was specified as a pointer to type aligned_double, and any object of type aligned_double must be 64-byte aligned. But if P[0] is 64-byte aligned, then P[1] cannot be, and this access causes undefined behavior. Getting round this problem requires a lot of awkward casting and hand-unrolling of loops, all of which is bad. With the align_value attribute, we can accomplish what we'd like in a well defined way: typedef double aligned_double_ptr attribute((align_value(64))); void foo(aligned_double_ptr P) { double x = P[0]; // This is fine. double y = P[1]; // This is fine too. } This attribute does not create a new type (and so it not part of the type system), and so will only "propagate" through templates, auto, etc. by optimizer deduction after inlining. This seems consistent with Intel's implementation (thanks to Alexey for confirming the various Intel-compiler behaviors). As a final note, I would have chosen to call this aligned_value, not align_value, for better naming consistency with the aligned attribute, but I think it would be more useful to users to adopt Intel's name. llvm-svn: 218910 2014-10-03 05:21:25 +08:00			`// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s \| FileCheck %s`

			`typedef double * __attribute__((align_value(64))) aligned_double;`

			`void foo(aligned_double x, double * y __attribute__((align_value(32))),`
			`double & z __attribute__((align_value(128)))) { };`
			`// CHECK: define void @_Z3fooPdS_Rd(double* align 64 %x, double* align 32 %y, double* dereferenceable(8) align 128 %z)`

Emit @llvm.assume for non-parameter lvalue align_value-attribute loads We already add the align parameter attribute for function parameters that have the align_value attribute (or those with a typedef type having that attribute), which is an important special case, but does not handle pointers with value alignment assumptions that come into scope in any other way. To handle the general case, emit an @llvm.assume-based alignment assumption whenever we load the pointer-typed lvalue of an align_value-attributed variable (except for function parameters, which we already deal with at entry). I'll also note that this is more general than Intel's described support in: https://software.intel.com/en-us/articles/data-alignment-to-assist-vectorization which states that the compiler inserts __assume_aligned directives in response to align_value-attributed variables only for function parameters and for the initializers of local variables. I think that we can make the optimizer deal with this more-general scheme (which could lead to a lot of calls to @llvm.assume inside of loop bodies, for example), but if not, I'll rework this to be less aggressive. llvm-svn: 219052 2014-10-04 23:26:49 +08:00			`struct ad_struct {`
			`aligned_double a;`
			`};`

			`double *foo(ad_struct& x) {`
			`// CHECK-LABEL: @_Z3fooR9ad_struct`

			`// CHECK: [[PTRINT1:%.+]] = ptrtoint`
			`// CHECK: [[MASKEDPTR1:%.+]] = and i64 [[PTRINT1]], 63`
			`// CHECK: [[MASKCOND1:%.+]] = icmp eq i64 [[MASKEDPTR1]], 0`
			`// CHECK: call void @llvm.assume(i1 [[MASKCOND1]])`
			`return x.a;`
			`}`

			`double goo(ad_struct x) {`
			`// CHECK-LABEL: @_Z3gooP9ad_struct`

			`// CHECK: [[PTRINT2:%.+]] = ptrtoint`
			`// CHECK: [[MASKEDPTR2:%.+]] = and i64 [[PTRINT2]], 63`
			`// CHECK: [[MASKCOND2:%.+]] = icmp eq i64 [[MASKEDPTR2]], 0`
			`// CHECK: call void @llvm.assume(i1 [[MASKCOND2]])`
			`return x->a;`
			`}`

			`double bar(aligned_double x) {`
			`// CHECK-LABEL: @_Z3barPPd`

			`// CHECK: [[PTRINT3:%.+]] = ptrtoint`
			`// CHECK: [[MASKEDPTR3:%.+]] = and i64 [[PTRINT3]], 63`
			`// CHECK: [[MASKCOND3:%.+]] = icmp eq i64 [[MASKEDPTR3]], 0`
			`// CHECK: call void @llvm.assume(i1 [[MASKCOND3]])`
			`return *x;`
			`}`

			`double *car(aligned_double &x) {`
			`// CHECK-LABEL: @_Z3carRPd`

			`// CHECK: [[PTRINT4:%.+]] = ptrtoint`
			`// CHECK: [[MASKEDPTR4:%.+]] = and i64 [[PTRINT4]], 63`
			`// CHECK: [[MASKCOND4:%.+]] = icmp eq i64 [[MASKEDPTR4]], 0`
			`// CHECK: call void @llvm.assume(i1 [[MASKCOND4]])`
			`return x;`
			`}`

			`double dar(aligned_double x) {`
			`// CHECK-LABEL: @_Z3darPPd`

			`// CHECK: [[PTRINT5:%.+]] = ptrtoint`
			`// CHECK: [[MASKEDPTR5:%.+]] = and i64 [[PTRINT5]], 63`
			`// CHECK: [[MASKCOND5:%.+]] = icmp eq i64 [[MASKEDPTR5]], 0`
			`// CHECK: call void @llvm.assume(i1 [[MASKCOND5]])`
			`return x[5];`
			`}`

			`aligned_double eep();`
			`double *ret() {`
			`// CHECK-LABEL: @_Z3retv`

			`// CHECK: [[PTRINT6:%.+]] = ptrtoint`
			`// CHECK: [[MASKEDPTR6:%.+]] = and i64 [[PTRINT6]], 63`
			`// CHECK: [[MASKCOND6:%.+]] = icmp eq i64 [[MASKEDPTR6]], 0`
			`// CHECK: call void @llvm.assume(i1 [[MASKCOND6]])`
			`return eep();`
			`}`

			`double *no1(aligned_double x) {`
			`// CHECK-LABEL: @_Z3no1PPd`
			`return x;`
			`// CHECK-NOT: call void @llvm.assume`
			`}`

			`double *&no2(aligned_double &x) {`
			`// CHECK-LABEL: @_Z3no2RPd`
			`return x;`
			`// CHECK-NOT: call void @llvm.assume`
			`}`

			`double **no3(aligned_double &x) {`
			`// CHECK-LABEL: @_Z3no3RPd`
			`return &x;`
			`// CHECK-NOT: call void @llvm.assume`
			`}`

			`double no3(aligned_double x) {`
			`// CHECK-LABEL: @_Z3no3Pd`
			`return *x;`
			`// CHECK-NOT: call void @llvm.assume`
			`}`

			`double *no4(aligned_double x) {`
			`// CHECK-LABEL: @_Z3no4Pd`
			`return x;`
			`// CHECK-NOT: call void @llvm.assume`
			`}`