llvm-project/clang/test/OpenMP/simd_metadata.c

// RUN: %clang_cc1 -fopenmp -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86
// RUN: %clang_cc1 -fopenmp -triple x86_64-unknown-unknown -target-feature +avx -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86-AVX
// RUN: %clang_cc1 -fopenmp -triple x86_64-unknown-unknown -target-feature +avx512f -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86-AVX512
// RUN: %clang_cc1 -fopenmp -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86
// RUN: %clang_cc1 -fopenmp -triple i386-unknown-unknown -target-feature +avx -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86-AVX
// RUN: %clang_cc1 -fopenmp -triple i386-unknown-unknown -target-feature +avx512f -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86-AVX512
// RUN: %clang_cc1 -fopenmp -triple powerpc64-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=PPC
// RUN: %clang_cc1 -fopenmp -triple powerpc64-unknown-unknown -target-abi elfv1-qpx -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=PPC-QPX

void h1(float *c, float *a, double b[], int size)
{
// CHECK-LABEL: define void @h1
  int t = 0;
#pragma omp simd safelen(16) linear(t) aligned(c:32) aligned(a,b)
// CHECK:         [[C_PTRINT:%.+]] = ptrtoint
// CHECK-NEXT:    [[C_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[C_PTRINT]], 31
// CHECK-NEXT:    [[C_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[C_MASKEDPTR]], 0
// CHECK-NEXT:    call void @llvm.assume(i1 [[C_MASKCOND]])
// CHECK:         [[A_PTRINT:%.+]] = ptrtoint

// X86-NEXT:     [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15
// X86-AVX-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 31
// X86-AVX512-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 63
// PPC-NEXT:     [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15
// PPC-QPX-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15

// CHECK-NEXT:    [[A_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[A_MASKEDPTR]], 0
// CHECK-NEXT:    call void @llvm.assume(i1 [[A_MASKCOND]])
// CHECK:         [[B_PTRINT:%.+]] = ptrtoint

// X86-NEXT:      [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 15
// X86-AVX-NEXT:  [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 31
// X86-AVX512-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 63
// PPC-NEXT:      [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 15
// PPC-QPX-NEXT:  [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 31

// CHECK-NEXT:    [[B_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[B_MASKEDPTR]], 0
// CHECK-NEXT:    call void @llvm.assume(i1 [[B_MASKCOND]])
  for (int i = 0; i < size; ++i) {
    c[i] = a[i] * a[i] + b[i] * b[t];
    ++t;
  }
// do not emit parallel_loop_access metadata due to usage of safelen clause.
// CHECK-NOT: store float {{.+}}, float* {{.+}}, align {{.+}}, !llvm.mem.parallel_loop_access {{![0-9]+}}
#pragma omp simd safelen(16) linear(t) aligned(c:32) aligned(a,b) simdlen(8)
// CHECK:         [[C_PTRINT:%.+]] = ptrtoint
// CHECK-NEXT:    [[C_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[C_PTRINT]], 31
// CHECK-NEXT:    [[C_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[C_MASKEDPTR]], 0
// CHECK-NEXT:    call void @llvm.assume(i1 [[C_MASKCOND]])
// CHECK:         [[A_PTRINT:%.+]] = ptrtoint

// X86-NEXT:     [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15
// X86-AVX-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 31
// X86-AVX512-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 63
// PPC-NEXT:     [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15
// PPC-QPX-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15

// CHECK-NEXT:    [[A_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[A_MASKEDPTR]], 0
// CHECK-NEXT:    call void @llvm.assume(i1 [[A_MASKCOND]])
// CHECK:         [[B_PTRINT:%.+]] = ptrtoint

// X86-NEXT:      [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 15
// X86-AVX-NEXT:  [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 31
// X86-AVX512-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 63
// PPC-NEXT:      [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 15
// PPC-QPX-NEXT:  [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 31

// CHECK-NEXT:    [[B_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[B_MASKEDPTR]], 0
// CHECK-NEXT:    call void @llvm.assume(i1 [[B_MASKCOND]])
  for (int i = 0; i < size; ++i) {
    c[i] = a[i] * a[i] + b[i] * b[t];
    ++t;
  }
// do not emit parallel_loop_access metadata due to usage of safelen clause.
// CHECK-NOT: store float {{.+}}, float* {{.+}}, align {{.+}}, !llvm.mem.parallel_loop_access {{![0-9]+}}
#pragma omp simd linear(t) aligned(c:32) aligned(a,b) simdlen(8)
// CHECK:         [[C_PTRINT:%.+]] = ptrtoint
// CHECK-NEXT:    [[C_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[C_PTRINT]], 31
// CHECK-NEXT:    [[C_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[C_MASKEDPTR]], 0
// CHECK-NEXT:    call void @llvm.assume(i1 [[C_MASKCOND]])
// CHECK:         [[A_PTRINT:%.+]] = ptrtoint

// X86-NEXT:     [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15
// X86-AVX-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 31
// X86-AVX512-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 63
// PPC-NEXT:     [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15
// PPC-QPX-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15

// CHECK-NEXT:    [[A_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[A_MASKEDPTR]], 0
// CHECK-NEXT:    call void @llvm.assume(i1 [[A_MASKCOND]])
// CHECK:         [[B_PTRINT:%.+]] = ptrtoint

// X86-NEXT:      [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 15
// X86-AVX-NEXT:  [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 31
// X86-AVX512-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 63
// PPC-NEXT:      [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 15
// PPC-QPX-NEXT:  [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 31

// CHECK-NEXT:    [[B_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[B_MASKEDPTR]], 0
// CHECK-NEXT:    call void @llvm.assume(i1 [[B_MASKCOND]])
  for (int i = 0; i < size; ++i) {
    c[i] = a[i] * a[i] + b[i] * b[t];
    ++t;
// CHECK: store float {{.+}}, float* {{.+}}, align {{.+}}, !llvm.mem.parallel_loop_access {{![0-9]+}}
  }
}

void h2(float *c, float *a, float *b, int size)
{
// CHECK-LABEL: define void @h2
  int t = 0;
#pragma omp simd linear(t)
  for (int i = 0; i < size; ++i) {
    c[i] = a[i] * a[i] + b[i] * b[t];
    ++t;
// CHECK: store float {{.+}}, float* {{.+}}, align {{.+}}, !llvm.mem.parallel_loop_access [[LOOP_H2_HEADER:![0-9]+]]
  }
}

void h3(float *c, float *a, float *b, int size)
{
// CHECK-LABEL: define void @h3
#pragma omp simd
  for (int i = 0; i < size; ++i) {
    for (int j = 0; j < size; ++j) {
      c[j*i] = a[i] * b[j];
    }
  }
// do not emit parallel_loop_access for nested loop.
// CHECK-NOT: store float {{.+}}, float* {{.+}}, align {{.+}}, !llvm.mem.parallel_loop_access {{![0-9]+}}
}

// Metadata for h1:
// CHECK: [[LOOP_H1_HEADER:![0-9]+]] = distinct !{[[LOOP_H1_HEADER]], [[LOOP_WIDTH_16:![0-9]+]], [[LOOP_VEC_ENABLE:![0-9]+]]}
// CHECK: [[LOOP_WIDTH_16]] = !{!"llvm.loop.vectorize.width", i32 16}
// CHECK: [[LOOP_VEC_ENABLE]] = !{!"llvm.loop.vectorize.enable", i1 true}
// CHECK: [[LOOP_H1_HEADER:![0-9]+]] = distinct !{[[LOOP_H1_HEADER]], [[LOOP_WIDTH_8:![0-9]+]], [[LOOP_VEC_ENABLE]]}
// CHECK: [[LOOP_WIDTH_8]] = !{!"llvm.loop.vectorize.width", i32 8}
// CHECK: [[LOOP_H1_HEADER:![0-9]+]] = distinct !{[[LOOP_H1_HEADER]], [[LOOP_WIDTH_8]], [[LOOP_VEC_ENABLE]]}
//
// Metadata for h2:
// CHECK: [[LOOP_H2_HEADER]] = distinct !{[[LOOP_H2_HEADER]], [[LOOP_VEC_ENABLE]]}
//
// Metadata for h3:
// CHECK: [[LOOP_H3_HEADER:![0-9]+]] = distinct !{[[LOOP_H3_HEADER]], [[LOOP_VEC_ENABLE]]}
//
[OpenMP] Test AVX default SIMD alignment. NFC. llvm-svn: 237991 2015-05-22 10:51:49 +08:00			`// RUN: %clang_cc1 -fopenmp -triple x86_64-unknown-unknown -emit-llvm %s -o - \| FileCheck %s -check-prefix=CHECK -check-prefix=X86`
			`// RUN: %clang_cc1 -fopenmp -triple x86_64-unknown-unknown -target-feature +avx -emit-llvm %s -o - \| FileCheck %s -check-prefix=CHECK -check-prefix=X86-AVX`
[CodeGen] Teach X86_64ABIInfo about AVX512. As specified in the SysV AVX512 ABI drafts. It follows the same scheme as AVX2: Arguments of type __m512 are split into eight eightbyte chunks. The least significant one belongs to class SSE and all the others to class SSEUP. This also means we change the OpenMP SIMD default alignment on AVX512. Based on r240337. Differential Revision: http://reviews.llvm.org/D9894 llvm-svn: 240338 2015-06-23 05:31:43 +08:00			`// RUN: %clang_cc1 -fopenmp -triple x86_64-unknown-unknown -target-feature +avx512f -emit-llvm %s -o - \| FileCheck %s -check-prefix=CHECK -check-prefix=X86-AVX512`
[X86] Use AVX features instead of ABI to init. SimdDefaultAlign. The ABI string only exists to communicate with TargetCodeGenInfo. Concretely, since we only used "avx*" ABI strings on x86_64 (as AVX doesn't affect the i386 ABIs), this meant that, when initializing SimdDefaultAlign, we would ignore AVX/AVX512 on i386, for no good reason. Instead, directly check the features. A similar change for MaxVectorAlign will follow. Differential Revision: http://reviews.llvm.org/D12390 llvm-svn: 246228 2015-08-28 06:24:56 +08:00			`// RUN: %clang_cc1 -fopenmp -triple i386-unknown-unknown -emit-llvm %s -o - \| FileCheck %s -check-prefix=CHECK -check-prefix=X86`
			`// RUN: %clang_cc1 -fopenmp -triple i386-unknown-unknown -target-feature +avx -emit-llvm %s -o - \| FileCheck %s -check-prefix=CHECK -check-prefix=X86-AVX`
			`// RUN: %clang_cc1 -fopenmp -triple i386-unknown-unknown -target-feature +avx512f -emit-llvm %s -o - \| FileCheck %s -check-prefix=CHECK -check-prefix=X86-AVX512`
[OpenMP] Test AVX default SIMD alignment. NFC. llvm-svn: 237991 2015-05-22 10:51:49 +08:00			`// RUN: %clang_cc1 -fopenmp -triple powerpc64-unknown-unknown -emit-llvm %s -o - \| FileCheck %s -check-prefix=CHECK -check-prefix=PPC`
			`// RUN: %clang_cc1 -fopenmp -triple powerpc64-unknown-unknown -target-abi elfv1-qpx -emit-llvm %s -o - \| FileCheck %s -check-prefix=CHECK -check-prefix=PPC-QPX`
This patch adds a helper class (CGLoopInfo) for marking memory instructions with llvm.mem.parallel_loop_access metadata. It also adds a simple initial version of codegen for pragma omp simd (it will change in the future to support all the clauses). Differential revision: http://reviews.llvm.org/D3644 llvm-svn: 209411 2014-05-22 16:54:05 +08:00
[OPENMP] Codegen of the ‘aligned’ clause for the ‘omp simd’ directive. Differential Revision: http://reviews.llvm.org/D5499 llvm-svn: 218660 2014-09-30 13:29:28 +08:00			`void h1(float c, float a, double b[], int size)`
This patch adds a helper class (CGLoopInfo) for marking memory instructions with llvm.mem.parallel_loop_access metadata. It also adds a simple initial version of codegen for pragma omp simd (it will change in the future to support all the clauses). Differential revision: http://reviews.llvm.org/D3644 llvm-svn: 209411 2014-05-22 16:54:05 +08:00			`{`
			`// CHECK-LABEL: define void @h1`
			`int t = 0;`
[OPENMP] Codegen of the ‘aligned’ clause for the ‘omp simd’ directive. Differential Revision: http://reviews.llvm.org/D5499 llvm-svn: 218660 2014-09-30 13:29:28 +08:00			`#pragma omp simd safelen(16) linear(t) aligned(c:32) aligned(a,b)`
			`// CHECK: [[C_PTRINT:%.+]] = ptrtoint`
			`// CHECK-NEXT: [[C_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[C_PTRINT]], 31`
			`// CHECK-NEXT: [[C_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[C_MASKEDPTR]], 0`
			`// CHECK-NEXT: call void @llvm.assume(i1 [[C_MASKCOND]])`
			`// CHECK: [[A_PTRINT:%.+]] = ptrtoint`
[OpenMP] Test AVX default SIMD alignment. NFC. llvm-svn: 237991 2015-05-22 10:51:49 +08:00
			`// X86-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15`
			`// X86-AVX-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 31`
[CodeGen] Teach X86_64ABIInfo about AVX512. As specified in the SysV AVX512 ABI drafts. It follows the same scheme as AVX2: Arguments of type __m512 are split into eight eightbyte chunks. The least significant one belongs to class SSE and all the others to class SSEUP. This also means we change the OpenMP SIMD default alignment on AVX512. Based on r240337. Differential Revision: http://reviews.llvm.org/D9894 llvm-svn: 240338 2015-06-23 05:31:43 +08:00			`// X86-AVX512-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 63`
[OpenMP] Test AVX default SIMD alignment. NFC. llvm-svn: 237991 2015-05-22 10:51:49 +08:00			`// PPC-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15`
			`// PPC-QPX-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15`

[OPENMP] Codegen of the ‘aligned’ clause for the ‘omp simd’ directive. Differential Revision: http://reviews.llvm.org/D5499 llvm-svn: 218660 2014-09-30 13:29:28 +08:00			`// CHECK-NEXT: [[A_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[A_MASKEDPTR]], 0`
			`// CHECK-NEXT: call void @llvm.assume(i1 [[A_MASKCOND]])`
			`// CHECK: [[B_PTRINT:%.+]] = ptrtoint`
[OpenMP] Test AVX default SIMD alignment. NFC. llvm-svn: 237991 2015-05-22 10:51:49 +08:00
			`// X86-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 15`
			`// X86-AVX-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 31`
[CodeGen] Teach X86_64ABIInfo about AVX512. As specified in the SysV AVX512 ABI drafts. It follows the same scheme as AVX2: Arguments of type __m512 are split into eight eightbyte chunks. The least significant one belongs to class SSE and all the others to class SSEUP. This also means we change the OpenMP SIMD default alignment on AVX512. Based on r240337. Differential Revision: http://reviews.llvm.org/D9894 llvm-svn: 240338 2015-06-23 05:31:43 +08:00			`// X86-AVX512-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 63`
[OpenMP] Test AVX default SIMD alignment. NFC. llvm-svn: 237991 2015-05-22 10:51:49 +08:00			`// PPC-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 15`
			`// PPC-QPX-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 31`

[OPENMP] Codegen of the ‘aligned’ clause for the ‘omp simd’ directive. Differential Revision: http://reviews.llvm.org/D5499 llvm-svn: 218660 2014-09-30 13:29:28 +08:00			`// CHECK-NEXT: [[B_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[B_MASKEDPTR]], 0`
			`// CHECK-NEXT: call void @llvm.assume(i1 [[B_MASKCOND]])`
This patch adds a helper class (CGLoopInfo) for marking memory instructions with llvm.mem.parallel_loop_access metadata. It also adds a simple initial version of codegen for pragma omp simd (it will change in the future to support all the clauses). Differential revision: http://reviews.llvm.org/D3644 llvm-svn: 209411 2014-05-22 16:54:05 +08:00			`for (int i = 0; i < size; ++i) {`
			`c[i] = a[i] * a[i] + b[i] * b[t];`
			`++t;`
[OPENMP 4.1] Add codegen for 'simdlen' clause. Add emission of metadata for simd loops in presence of 'simdlen' clause. If 'simdlen' clause is provided without 'safelen' clause, the vectorizer width for the loop is set to value of 'simdlen' clause + all read/write ops in loop are marked with '!llvm.mem.parallel_loop_access' metadata. If 'simdlen' clause is provided along with 'safelen' clause, the vectorizer width for the loop is set to value of 'simdlen' clause + all read/write ops in loop are not marked with '!llvm.mem.parallel_loop_access' metadata. If 'safelen' clause is provided without 'simdlen' clause, the vectorizer width for the loop is set to value of 'safelen' clause + all read/write ops in loop are not marked with '!llvm.mem.parallel_loop_access' metadata. llvm-svn: 245697 2015-08-21 20:19:04 +08:00			`}`
			`// do not emit parallel_loop_access metadata due to usage of safelen clause.`
			`// CHECK-NOT: store float {{.+}}, float* {{.+}}, align {{.+}}, !llvm.mem.parallel_loop_access {{![0-9]+}}`
			`#pragma omp simd safelen(16) linear(t) aligned(c:32) aligned(a,b) simdlen(8)`
			`// CHECK: [[C_PTRINT:%.+]] = ptrtoint`
			`// CHECK-NEXT: [[C_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[C_PTRINT]], 31`
			`// CHECK-NEXT: [[C_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[C_MASKEDPTR]], 0`
			`// CHECK-NEXT: call void @llvm.assume(i1 [[C_MASKCOND]])`
			`// CHECK: [[A_PTRINT:%.+]] = ptrtoint`

			`// X86-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15`
			`// X86-AVX-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 31`
			`// X86-AVX512-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 63`
			`// PPC-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15`
			`// PPC-QPX-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15`

			`// CHECK-NEXT: [[A_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[A_MASKEDPTR]], 0`
			`// CHECK-NEXT: call void @llvm.assume(i1 [[A_MASKCOND]])`
			`// CHECK: [[B_PTRINT:%.+]] = ptrtoint`

			`// X86-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 15`
			`// X86-AVX-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 31`
			`// X86-AVX512-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 63`
			`// PPC-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 15`
			`// PPC-QPX-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 31`

			`// CHECK-NEXT: [[B_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[B_MASKEDPTR]], 0`
			`// CHECK-NEXT: call void @llvm.assume(i1 [[B_MASKCOND]])`
			`for (int i = 0; i < size; ++i) {`
			`c[i] = a[i] * a[i] + b[i] * b[t];`
			`++t;`
			`}`
This patch adds a helper class (CGLoopInfo) for marking memory instructions with llvm.mem.parallel_loop_access metadata. It also adds a simple initial version of codegen for pragma omp simd (it will change in the future to support all the clauses). Differential revision: http://reviews.llvm.org/D3644 llvm-svn: 209411 2014-05-22 16:54:05 +08:00			`// do not emit parallel_loop_access metadata due to usage of safelen clause.`
			`// CHECK-NOT: store float {{.+}}, float* {{.+}}, align {{.+}}, !llvm.mem.parallel_loop_access {{![0-9]+}}`
[OPENMP 4.1] Add codegen for 'simdlen' clause. Add emission of metadata for simd loops in presence of 'simdlen' clause. If 'simdlen' clause is provided without 'safelen' clause, the vectorizer width for the loop is set to value of 'simdlen' clause + all read/write ops in loop are marked with '!llvm.mem.parallel_loop_access' metadata. If 'simdlen' clause is provided along with 'safelen' clause, the vectorizer width for the loop is set to value of 'simdlen' clause + all read/write ops in loop are not marked with '!llvm.mem.parallel_loop_access' metadata. If 'safelen' clause is provided without 'simdlen' clause, the vectorizer width for the loop is set to value of 'safelen' clause + all read/write ops in loop are not marked with '!llvm.mem.parallel_loop_access' metadata. llvm-svn: 245697 2015-08-21 20:19:04 +08:00			`#pragma omp simd linear(t) aligned(c:32) aligned(a,b) simdlen(8)`
			`// CHECK: [[C_PTRINT:%.+]] = ptrtoint`
			`// CHECK-NEXT: [[C_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[C_PTRINT]], 31`
			`// CHECK-NEXT: [[C_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[C_MASKEDPTR]], 0`
			`// CHECK-NEXT: call void @llvm.assume(i1 [[C_MASKCOND]])`
			`// CHECK: [[A_PTRINT:%.+]] = ptrtoint`

			`// X86-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15`
			`// X86-AVX-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 31`
			`// X86-AVX512-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 63`
			`// PPC-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15`
			`// PPC-QPX-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15`

			`// CHECK-NEXT: [[A_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[A_MASKEDPTR]], 0`
			`// CHECK-NEXT: call void @llvm.assume(i1 [[A_MASKCOND]])`
			`// CHECK: [[B_PTRINT:%.+]] = ptrtoint`

			`// X86-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 15`
			`// X86-AVX-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 31`
			`// X86-AVX512-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 63`
			`// PPC-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 15`
			`// PPC-QPX-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 31`

			`// CHECK-NEXT: [[B_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[B_MASKEDPTR]], 0`
			`// CHECK-NEXT: call void @llvm.assume(i1 [[B_MASKCOND]])`
			`for (int i = 0; i < size; ++i) {`
			`c[i] = a[i] * a[i] + b[i] * b[t];`
			`++t;`
			`// CHECK: store float {{.+}}, float* {{.+}}, align {{.+}}, !llvm.mem.parallel_loop_access {{![0-9]+}}`
This patch adds a helper class (CGLoopInfo) for marking memory instructions with llvm.mem.parallel_loop_access metadata. It also adds a simple initial version of codegen for pragma omp simd (it will change in the future to support all the clauses). Differential revision: http://reviews.llvm.org/D3644 llvm-svn: 209411 2014-05-22 16:54:05 +08:00			`}`
			`}`

			`void h2(float c, float a, float *b, int size)`
			`{`
			`// CHECK-LABEL: define void @h2`
			`int t = 0;`
			`#pragma omp simd linear(t)`
			`for (int i = 0; i < size; ++i) {`
			`c[i] = a[i] * a[i] + b[i] * b[t];`
			`++t;`
			`// CHECK: store float {{.+}}, float* {{.+}}, align {{.+}}, !llvm.mem.parallel_loop_access [[LOOP_H2_HEADER:![0-9]+]]`
			`}`
			`}`

			`void h3(float c, float a, float *b, int size)`
			`{`
			`// CHECK-LABEL: define void @h3`
			`#pragma omp simd`
			`for (int i = 0; i < size; ++i) {`
			`for (int j = 0; j < size; ++j) {`
			`c[ji] = a[i] b[j];`
			`}`
			`}`
			`// do not emit parallel_loop_access for nested loop.`
			`// CHECK-NOT: store float {{.+}}, float* {{.+}}, align {{.+}}, !llvm.mem.parallel_loop_access {{![0-9]+}}`
			`}`

			`// Metadata for h1:`
IR: Add 'distinct' MDNodes to bitcode and assembly (clang) Update testcases for LLVM change in r225474 to make `MDNode`s explicitly distinct (when they aren't uniqued). Part of PR22111. llvm-svn: 225475 2015-01-09 06:39:28 +08:00			`// CHECK: [[LOOP_H1_HEADER:![0-9]+]] = distinct !{[[LOOP_H1_HEADER]], [[LOOP_WIDTH_16:![0-9]+]], [[LOOP_VEC_ENABLE:![0-9]+]]}`
IR: Make metadata typeless in assembly, clang side Match LLVM changes from r224257. llvm-svn: 224259 2014-12-16 03:10:08 +08:00			`// CHECK: [[LOOP_WIDTH_16]] = !{!"llvm.loop.vectorize.width", i32 16}`
			`// CHECK: [[LOOP_VEC_ENABLE]] = !{!"llvm.loop.vectorize.enable", i1 true}`
[OPENMP 4.1] Add codegen for 'simdlen' clause. Add emission of metadata for simd loops in presence of 'simdlen' clause. If 'simdlen' clause is provided without 'safelen' clause, the vectorizer width for the loop is set to value of 'simdlen' clause + all read/write ops in loop are marked with '!llvm.mem.parallel_loop_access' metadata. If 'simdlen' clause is provided along with 'safelen' clause, the vectorizer width for the loop is set to value of 'simdlen' clause + all read/write ops in loop are not marked with '!llvm.mem.parallel_loop_access' metadata. If 'safelen' clause is provided without 'simdlen' clause, the vectorizer width for the loop is set to value of 'safelen' clause + all read/write ops in loop are not marked with '!llvm.mem.parallel_loop_access' metadata. llvm-svn: 245697 2015-08-21 20:19:04 +08:00			`// CHECK: [[LOOP_H1_HEADER:![0-9]+]] = distinct !{[[LOOP_H1_HEADER]], [[LOOP_WIDTH_8:![0-9]+]], [[LOOP_VEC_ENABLE]]}`
			`// CHECK: [[LOOP_WIDTH_8]] = !{!"llvm.loop.vectorize.width", i32 8}`
			`// CHECK: [[LOOP_H1_HEADER:![0-9]+]] = distinct !{[[LOOP_H1_HEADER]], [[LOOP_WIDTH_8]], [[LOOP_VEC_ENABLE]]}`
This patch adds a helper class (CGLoopInfo) for marking memory instructions with llvm.mem.parallel_loop_access metadata. It also adds a simple initial version of codegen for pragma omp simd (it will change in the future to support all the clauses). Differential revision: http://reviews.llvm.org/D3644 llvm-svn: 209411 2014-05-22 16:54:05 +08:00			`//`
			`// Metadata for h2:`
IR: Add 'distinct' MDNodes to bitcode and assembly (clang) Update testcases for LLVM change in r225474 to make `MDNode`s explicitly distinct (when they aren't uniqued). Part of PR22111. llvm-svn: 225475 2015-01-09 06:39:28 +08:00			`// CHECK: [[LOOP_H2_HEADER]] = distinct !{[[LOOP_H2_HEADER]], [[LOOP_VEC_ENABLE]]}`
This patch adds a helper class (CGLoopInfo) for marking memory instructions with llvm.mem.parallel_loop_access metadata. It also adds a simple initial version of codegen for pragma omp simd (it will change in the future to support all the clauses). Differential revision: http://reviews.llvm.org/D3644 llvm-svn: 209411 2014-05-22 16:54:05 +08:00			`//`
			`// Metadata for h3:`
IR: Add 'distinct' MDNodes to bitcode and assembly (clang) Update testcases for LLVM change in r225474 to make `MDNode`s explicitly distinct (when they aren't uniqued). Part of PR22111. llvm-svn: 225475 2015-01-09 06:39:28 +08:00			`// CHECK: [[LOOP_H3_HEADER:![0-9]+]] = distinct !{[[LOOP_H3_HEADER]], [[LOOP_VEC_ENABLE]]}`
This patch adds a helper class (CGLoopInfo) for marking memory instructions with llvm.mem.parallel_loop_access metadata. It also adds a simple initial version of codegen for pragma omp simd (it will change in the future to support all the clauses). Differential revision: http://reviews.llvm.org/D3644 llvm-svn: 209411 2014-05-22 16:54:05 +08:00			`//`
No results found.