re-commit r361928: [PowerPC] [Clang] Port SSE intrinsics to PowerPC

Port xmmintrin.h which include Intel SSE intrinsics implementation to PowerPC platform (using Altivec).

The new headers containing those implemenations are located into a directory named ppc_wrappers
which has higher priority when the platform is PowerPC on Linux. They are mainly developed by Steven Munroe,
with contributions from Paul Clarke, Bill Schmidt, Jinsong Ji and Zixuan Wu.

Patched by: Qiu Chaofan <qiucf@cn.ibm.com>
Reviewed By: Jinsong Ji

Differential Revision: https://reviews.llvm.org/D62121

llvm-svn: 362190
This commit is contained in:
Zi Xuan Wu 2019-05-31 04:42:13 +00:00
parent 20b80fc484
commit fc3ed1ec50
10 changed files with 4160 additions and 17 deletions

View File

@ -126,6 +126,8 @@ set(cuda_wrapper_files
set(ppc_wrapper_files set(ppc_wrapper_files
ppc_wrappers/mmintrin.h ppc_wrappers/mmintrin.h
ppc_wrappers/xmmintrin.h
ppc_wrappers/mm_malloc.h
) )
set(openmp_wrapper_files set(openmp_wrapper_files

View File

@ -0,0 +1,48 @@
/*===---- mm_malloc.h - Implementation of _mm_malloc and _mm_free ----------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
#ifndef _MM_MALLOC_H_INCLUDED
#define _MM_MALLOC_H_INCLUDED
#include <stdlib.h>
/* We can't depend on <stdlib.h> since the prototype of posix_memalign
may not be visible. */
#ifndef __cplusplus
extern int posix_memalign (void **, size_t, size_t);
#else
extern "C" int posix_memalign (void **, size_t, size_t) throw ();
#endif
static __inline void *
_mm_malloc (size_t size, size_t alignment)
{
/* PowerPC64 ELF V2 ABI requires quadword alignment. */
size_t vec_align = sizeof (__vector float);
/* Linux GLIBC malloc alignment is at least 2 X ptr size. */
size_t malloc_align = (sizeof (void *) + sizeof (void *));
void *ptr;
if (alignment == malloc_align && alignment == vec_align)
return malloc (size);
if (alignment < vec_align)
alignment = vec_align;
if (posix_memalign (&ptr, alignment, size) == 0)
return ptr;
else
return NULL;
}
static __inline void
_mm_free (void * ptr)
{
free (ptr);
}
#endif /* _MM_MALLOC_H_INCLUDED */

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,72 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// REQUIRES: powerpc-registered-target
// UNSUPPORTED: !powerpc64le-
// The stdlib.h included in mm_malloc.h references native system header
// like: bits/libc-header-start.h or features.h, cross-compile it may
// require installing target headers in build env, otherwise expecting
// failures. So this test will focus on native build only.
// RUN: %clang -target powerpc64le-unknown-linux-gnu -S -emit-llvm %s -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt | FileCheck %s
#include <mm_malloc.h>
void __attribute__((noinline))
test_mm_malloc() {
char *buf = _mm_malloc(100, 16);
_mm_free(buf);
}
// CHECK-LABEL: @test_mm_malloc
// CHECK: define internal i8* @_mm_malloc(i64 [[REG1:[0-9a-zA-Z_%.]+]], i64 [[REG2:[0-9a-zA-Z_%.]+]])
// CHECK: [[REG3:[0-9a-zA-Z_%.]+]] = alloca i8*, align 8
// CHECK: store i64 [[REG1]], i64* [[REG4:[0-9a-zA-Z_%.]+]], align 8
// CHECK-NEXT: store i64 [[REG2]], i64* [[REG5:[0-9a-zA-Z_%.]+]], align 8
// CHECK-NEXT: store i64 16, i64* [[REG6:[0-9a-zA-Z_%.]+]], align 8
// CHECK-NEXT: store i64 16, i64* [[REG7:[0-9a-zA-Z_%.]+]], align 8
// CHECK-NEXT: [[REG8:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG5]], align 8
// CHECK-NEXT: [[REG9:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG7]], align 8
// CHECK-NEXT: [[REG10:[0-9a-zA-Z_%.]+]] = icmp eq i64 [[REG8]], [[REG9]]
// CHECK-NEXT: br i1 [[REG10]], label %[[REG11:[0-9a-zA-Z_%.]+]], label %[[REG12:[0-9a-zA-Z_%.]+]]
// CHECK: [[REG11]]:
// CHECK-NEXT: [[REG13:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG5]], align 8
// CHECK-NEXT: [[REG14:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG6]], align 8
// CHECK-NEXT: [[REG15:[0-9a-zA-Z_%.]+]] = icmp eq i64 [[REG13]], [[REG14]]
// CHECK-NEXT: br i1 [[REG15]], label %[[REG16:[0-9a-zA-Z_%.]+]], label %[[REG12:[0-9a-zA-Z_%.]+]]
// CHECK: [[REG16]]:
// CHECK-NEXT: [[REG17:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG4]], align 8
// CHECK-NEXT: [[REG18:[0-9a-zA-Z_%.]+]] = call noalias i8* @malloc(i64 [[REG17]])
// CHECK-NEXT: store i8* [[REG18]], i8** [[REG3]], align 8
// CHECK-NEXT: br label %[[REG19:[0-9a-zA-Z_%.]+]]
// CHECK: [[REG12]]:
// CHECK-NEXT: [[REG20:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG5]], align 8
// CHECK-NEXT: [[REG21:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG6]], align 8
// CHECK-NEXT: [[REG22:[0-9a-zA-Z_%.]+]] = icmp ult i64 [[REG20]], [[REG21]]
// CHECK-NEXT: br i1 [[REG22]], label %[[REG23:[0-9a-zA-Z_%.]+]], label %[[REG24:[0-9a-zA-Z_%.]+]]
// CHECK: [[REG23]]:
// CHECK-NEXT: [[REG25:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG6]], align 8
// CHECK-NEXT: store i64 [[REG25]], i64* [[REG5]], align 8
// CHECK-NEXT: br label %[[REG12:[0-9a-zA-Z_%.]+]]4
// CHECK: [[REG24]]:
// CHECK-NEXT: [[REG26:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG5]], align 8
// CHECK-NEXT: [[REG27:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG4]], align 8
// CHECK-NEXT: [[REG28:[0-9a-zA-Z_%.]+]] = call signext i32 @posix_memalign(i8** [[REG29:[0-9a-zA-Z_%.]+]], i64 [[REG26]], i64 [[REG27]])
// CHECK-NEXT: [[REG30:[0-9a-zA-Z_%.]+]] = icmp eq i32 [[REG28]], 0
// CHECK-NEXT: br i1 [[REG30]], label %[[REG31:[0-9a-zA-Z_%.]+]], label %[[REG32:[0-9a-zA-Z_%.]+]]
// CHECK: [[REG31]]:
// CHECK-NEXT: [[REG33:[0-9a-zA-Z_%.]+]] = load i8*, i8** [[REG29]], align 8
// CHECK-NEXT: store i8* [[REG33]], i8** [[REG3]], align 8
// CHECK-NEXT: br label %[[REG19:[0-9a-zA-Z_%.]+]]
// CHECK: [[REG32]]:
// CHECK-NEXT: store i8* null, i8** [[REG3]], align 8
// CHECK-NEXT: br label %[[REG19:[0-9a-zA-Z_%.]+]]
// CHECK: [[REG19]]:
// CHECK-NEXT: [[REG34:[0-9a-zA-Z_%.]+]] = load i8*, i8** [[REG3]], align 8
// CHECK-NEXT: ret i8* [[REG34]]
// CHECK: define internal void @_mm_free(i8* [[REG35:[0-9a-zA-Z_%.]+]])
// CHECK: store i8* [[REG35]], i8** [[REG36:[0-9a-zA-Z_%.]+]], align 8
// CHECK-NEXT: [[REG37:[0-9a-zA-Z_%.]+]] = load i8*, i8** [[REG36]], align 8
// CHECK-NEXT: call void @free(i8* [[REG37]])
// CHECK-NEXT: ret void

View File

@ -0,0 +1,72 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// REQUIRES: powerpc-registered-target
// UNSUPPORTED: !powerpc64-
// The stdlib.h included in mm_malloc.h references native system header
// like: bits/libc-header-start.h or features.h, cross-compile it may
// require installing target headers in build env, otherwise expecting
// failures. So this test will focus on native build only.
// RUN: %clang -target powerpc64-unknown-linux-gnu -S -emit-llvm %s -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt | FileCheck %s
#include <mm_malloc.h>
void __attribute__((noinline))
test_mm_malloc() {
char *buf = _mm_malloc(100, 16);
_mm_free(buf);
}
// CHECK-LABEL: @test_mm_malloc
// CHECK: define internal i8* @_mm_malloc(i64 [[REG1:[0-9a-zA-Z_%.]+]], i64 [[REG2:[0-9a-zA-Z_%.]+]])
// CHECK: [[REG3:[0-9a-zA-Z_%.]+]] = alloca i8*, align 8
// CHECK: store i64 [[REG1]], i64* [[REG4:[0-9a-zA-Z_%.]+]], align 8
// CHECK-NEXT: store i64 [[REG2]], i64* [[REG5:[0-9a-zA-Z_%.]+]], align 8
// CHECK-NEXT: store i64 16, i64* [[REG6:[0-9a-zA-Z_%.]+]], align 8
// CHECK-NEXT: store i64 16, i64* [[REG7:[0-9a-zA-Z_%.]+]], align 8
// CHECK-NEXT: [[REG8:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG5]], align 8
// CHECK-NEXT: [[REG9:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG7]], align 8
// CHECK-NEXT: [[REG10:[0-9a-zA-Z_%.]+]] = icmp eq i64 [[REG8]], [[REG9]]
// CHECK-NEXT: br i1 [[REG10]], label %[[REG11:[0-9a-zA-Z_%.]+]], label %[[REG12:[0-9a-zA-Z_%.]+]]
// CHECK: [[REG11]]:
// CHECK-NEXT: [[REG13:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG5]], align 8
// CHECK-NEXT: [[REG14:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG6]], align 8
// CHECK-NEXT: [[REG15:[0-9a-zA-Z_%.]+]] = icmp eq i64 [[REG13]], [[REG14]]
// CHECK-NEXT: br i1 [[REG15]], label %[[REG16:[0-9a-zA-Z_%.]+]], label %[[REG12:[0-9a-zA-Z_%.]+]]
// CHECK: [[REG16]]:
// CHECK-NEXT: [[REG17:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG4]], align 8
// CHECK-NEXT: [[REG18:[0-9a-zA-Z_%.]+]] = call noalias i8* @malloc(i64 [[REG17]])
// CHECK-NEXT: store i8* [[REG18]], i8** [[REG3]], align 8
// CHECK-NEXT: br label %[[REG19:[0-9a-zA-Z_%.]+]]
// CHECK: [[REG12]]:
// CHECK-NEXT: [[REG20:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG5]], align 8
// CHECK-NEXT: [[REG21:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG6]], align 8
// CHECK-NEXT: [[REG22:[0-9a-zA-Z_%.]+]] = icmp ult i64 [[REG20]], [[REG21]]
// CHECK-NEXT: br i1 [[REG22]], label %[[REG23:[0-9a-zA-Z_%.]+]], label %[[REG24:[0-9a-zA-Z_%.]+]]
// CHECK: [[REG23]]:
// CHECK-NEXT: [[REG25:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG6]], align 8
// CHECK-NEXT: store i64 [[REG25]], i64* [[REG5]], align 8
// CHECK-NEXT: br label %[[REG12:[0-9a-zA-Z_%.]+]]4
// CHECK: [[REG24]]:
// CHECK-NEXT: [[REG26:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG5]], align 8
// CHECK-NEXT: [[REG27:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG4]], align 8
// CHECK-NEXT: [[REG28:[0-9a-zA-Z_%.]+]] = call signext i32 @posix_memalign(i8** [[REG29:[0-9a-zA-Z_%.]+]], i64 [[REG26]], i64 [[REG27]])
// CHECK-NEXT: [[REG30:[0-9a-zA-Z_%.]+]] = icmp eq i32 [[REG28]], 0
// CHECK-NEXT: br i1 [[REG30]], label %[[REG31:[0-9a-zA-Z_%.]+]], label %[[REG32:[0-9a-zA-Z_%.]+]]
// CHECK: [[REG31]]:
// CHECK-NEXT: [[REG33:[0-9a-zA-Z_%.]+]] = load i8*, i8** [[REG29]], align 8
// CHECK-NEXT: store i8* [[REG33]], i8** [[REG3]], align 8
// CHECK-NEXT: br label %[[REG19:[0-9a-zA-Z_%.]+]]
// CHECK: [[REG32]]:
// CHECK-NEXT: store i8* null, i8** [[REG3]], align 8
// CHECK-NEXT: br label %[[REG19:[0-9a-zA-Z_%.]+]]
// CHECK: [[REG19]]:
// CHECK-NEXT: [[REG34:[0-9a-zA-Z_%.]+]] = load i8*, i8** [[REG3]], align 8
// CHECK-NEXT: ret i8* [[REG34]]
// CHECK: define internal void @_mm_free(i8* [[REG35:[0-9a-zA-Z_%.]+]])
// CHECK: store i8* [[REG35]], i8** [[REG36:[0-9a-zA-Z_%.]+]], align 8
// CHECK-NEXT: [[REG37:[0-9a-zA-Z_%.]+]] = load i8*, i8** [[REG36]], align 8
// CHECK-NEXT: call void @free(i8* [[REG37]])
// CHECK-NEXT: ret void

View File

@ -1,12 +1,13 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// REQUIRES: powerpc-registered-target // REQUIRES: powerpc-registered-target
// RUN: %clang -S -emit-llvm -target powerpc64-gnu-linux -mcpu=pwr8 -DNO_WARN_X86_INTRINSICS %s \ // RUN: %clang -S -emit-llvm -target powerpc64-unknown-linux-gnu -mcpu=pwr8 -DNO_WARN_X86_INTRINSICS %s \
// RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt | FileCheck %s --check-prefixes=CHECK-P8,CHECK,CHECK-BE // RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt | FileCheck %s --check-prefixes=CHECK-P8,CHECK,CHECK-BE
// RUN: %clang -S -emit-llvm -target powerpc64le-gnu-linux -mcpu=pwr8 -DNO_WARN_X86_INTRINSICS %s \ // RUN: %clang -S -emit-llvm -target powerpc64le-unknown-linux-gnu -mcpu=pwr8 -DNO_WARN_X86_INTRINSICS %s \
// RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt | FileCheck %s --check-prefixes=CHECK-P8,CHECK,CHECK-LE // RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt | FileCheck %s --check-prefixes=CHECK-P8,CHECK,CHECK-LE
// RUN: %clang -S -emit-llvm -target powerpc64-gnu-linux -mcpu=pwr9 -DNO_WARN_X86_INTRINSICS %s \ // RUN: %clang -S -emit-llvm -target powerpc64-unknown-linux-gnu -mcpu=pwr9 -DNO_WARN_X86_INTRINSICS %s \
// RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt | FileCheck %s --check-prefixes=CHECK-P9,CHECK,CHECK-BE // RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt | FileCheck %s --check-prefixes=CHECK-P9,CHECK,CHECK-BE
// RUN: %clang -S -emit-llvm -target powerpc64le-gnu-linux -mcpu=pwr9 -DNO_WARN_X86_INTRINSICS %s \ // RUN: %clang -S -emit-llvm -target powerpc64le-unknown-linux-gnu -mcpu=pwr9 -DNO_WARN_X86_INTRINSICS %s \
// RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt | FileCheck %s --check-prefixes=CHECK-P9,CHECK,CHECK-LE // RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt | FileCheck %s --check-prefixes=CHECK-P9,CHECK,CHECK-LE
#include <mmintrin.h> #include <mmintrin.h>

File diff suppressed because it is too large Load Diff

View File

@ -1,13 +0,0 @@
// REQUIRES: powerpc-registered-target
// RUN: %clang -S -emit-llvm -DNO_WARN_X86_INTRINSICS -target powerpc64-gnu-linux %s -Xclang -verify -o - | FileCheck %s
// RUN: %clang -S -emit-llvm -DNO_WARN_X86_INTRINSICS -target powerpc64-gnu-linux %s -Xclang -verify -x c++ -o - | FileCheck %s
// expected-no-diagnostics
// RUN: not %clang -S -emit-llvm -target powerpc64-gnu-linux %s -o /dev/null 2>&1 | FileCheck %s -check-prefix=CHECK-ERROR
#include <mmintrin.h>
// CHECK-ERROR: mmintrin.h:{{[0-9]+}}:{{[0-9]+}}: error: "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this error."
// CHECK: target triple = "powerpc64-
// CHECK: !llvm.module.flags =

View File

@ -0,0 +1,11 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// REQUIRES: powerpc-registered-target
// RUN: %clang -S -emit-llvm -DNO_WARN_X86_INTRINSICS -mcpu=pwr7 -target powerpc64-unknown-linux-gnu %s -Xclang -verify
// RUN: %clang -S -emit-llvm -DNO_WARN_X86_INTRINSICS -mcpu=pwr7 -target powerpc64-unknown-linux-gnu %s -Xclang -verify -x c++
// expected-no-diagnostics
// RUN: not %clang -S -emit-llvm -target powerpc64-unknown-linux-gnu -mcpu=pwr7 %s -o /dev/null 2>&1 | FileCheck %s -check-prefix=CHECK-ERROR
#include <mmintrin.h>
// CHECK-ERROR: mmintrin.h:{{[0-9]+}}:{{[0-9]+}}: error: "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this error."

View File

@ -0,0 +1,22 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// REQUIRES: powerpc-registered-target
// Since mm_malloc.h references system native stdlib.h, doing cross-compile
// testing may cause unexpected problems. This would affect xmmintrin.h and
// other following intrinsics headers. If there's need to test them using
// cross-compile, please add -ffreestanding to compiler options, like
// test/CodeGen/ppc-xmmintrin.c.
// RUN: %clang -target powerpc64-unknown-linux-gnu -S -emit-llvm -DNO_WARN_X86_INTRINSICS %s -mcpu=pwr7 -Xclang -verify
// RUN: %clang -target powerpc64-unknown-linux-gnu -S -emit-llvm -DNO_WARN_X86_INTRINSICS %s -mcpu=pwr7 -Xclang -verify -x c++
// expected-no-diagnostics
// RUN: not %clang -target powerpc64-unknown-linux-gnu -S -emit-llvm %s -mcpu=pwr7 -o /dev/null 2>&1 | FileCheck %s -check-prefix=SSE-ERROR
// Don't include mm_malloc.h, it's system specific.
#define _MM_MALLOC_H_INCLUDED
// Altivec must be enabled.
#include <xmmintrin.h>
// SSE-ERROR: xmmintrin.h:{{[0-9]+}}:{{[0-9]+}}: error: "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this error."