forked from OSchip/llvm-project
re-commit r361928: [PowerPC] [Clang] Port SSE intrinsics to PowerPC
Port xmmintrin.h which include Intel SSE intrinsics implementation to PowerPC platform (using Altivec). The new headers containing those implemenations are located into a directory named ppc_wrappers which has higher priority when the platform is PowerPC on Linux. They are mainly developed by Steven Munroe, with contributions from Paul Clarke, Bill Schmidt, Jinsong Ji and Zixuan Wu. Patched by: Qiu Chaofan <qiucf@cn.ibm.com> Reviewed By: Jinsong Ji Differential Revision: https://reviews.llvm.org/D62121 llvm-svn: 362190
This commit is contained in:
parent
20b80fc484
commit
fc3ed1ec50
|
@ -126,6 +126,8 @@ set(cuda_wrapper_files
|
|||
|
||||
set(ppc_wrapper_files
|
||||
ppc_wrappers/mmintrin.h
|
||||
ppc_wrappers/xmmintrin.h
|
||||
ppc_wrappers/mm_malloc.h
|
||||
)
|
||||
|
||||
set(openmp_wrapper_files
|
||||
|
|
|
@ -0,0 +1,48 @@
|
|||
/*===---- mm_malloc.h - Implementation of _mm_malloc and _mm_free ----------===
|
||||
*
|
||||
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
#ifndef _MM_MALLOC_H_INCLUDED
|
||||
#define _MM_MALLOC_H_INCLUDED
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
/* We can't depend on <stdlib.h> since the prototype of posix_memalign
|
||||
may not be visible. */
|
||||
#ifndef __cplusplus
|
||||
extern int posix_memalign (void **, size_t, size_t);
|
||||
#else
|
||||
extern "C" int posix_memalign (void **, size_t, size_t) throw ();
|
||||
#endif
|
||||
|
||||
static __inline void *
|
||||
_mm_malloc (size_t size, size_t alignment)
|
||||
{
|
||||
/* PowerPC64 ELF V2 ABI requires quadword alignment. */
|
||||
size_t vec_align = sizeof (__vector float);
|
||||
/* Linux GLIBC malloc alignment is at least 2 X ptr size. */
|
||||
size_t malloc_align = (sizeof (void *) + sizeof (void *));
|
||||
void *ptr;
|
||||
|
||||
if (alignment == malloc_align && alignment == vec_align)
|
||||
return malloc (size);
|
||||
if (alignment < vec_align)
|
||||
alignment = vec_align;
|
||||
if (posix_memalign (&ptr, alignment, size) == 0)
|
||||
return ptr;
|
||||
else
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static __inline void
|
||||
_mm_free (void * ptr)
|
||||
{
|
||||
free (ptr);
|
||||
}
|
||||
|
||||
#endif /* _MM_MALLOC_H_INCLUDED */
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,72 @@
|
|||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
|
||||
// REQUIRES: powerpc-registered-target
|
||||
// UNSUPPORTED: !powerpc64le-
|
||||
// The stdlib.h included in mm_malloc.h references native system header
|
||||
// like: bits/libc-header-start.h or features.h, cross-compile it may
|
||||
// require installing target headers in build env, otherwise expecting
|
||||
// failures. So this test will focus on native build only.
|
||||
|
||||
// RUN: %clang -target powerpc64le-unknown-linux-gnu -S -emit-llvm %s -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt | FileCheck %s
|
||||
|
||||
#include <mm_malloc.h>
|
||||
|
||||
|
||||
void __attribute__((noinline))
|
||||
test_mm_malloc() {
|
||||
char *buf = _mm_malloc(100, 16);
|
||||
_mm_free(buf);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_mm_malloc
|
||||
|
||||
// CHECK: define internal i8* @_mm_malloc(i64 [[REG1:[0-9a-zA-Z_%.]+]], i64 [[REG2:[0-9a-zA-Z_%.]+]])
|
||||
// CHECK: [[REG3:[0-9a-zA-Z_%.]+]] = alloca i8*, align 8
|
||||
// CHECK: store i64 [[REG1]], i64* [[REG4:[0-9a-zA-Z_%.]+]], align 8
|
||||
// CHECK-NEXT: store i64 [[REG2]], i64* [[REG5:[0-9a-zA-Z_%.]+]], align 8
|
||||
// CHECK-NEXT: store i64 16, i64* [[REG6:[0-9a-zA-Z_%.]+]], align 8
|
||||
// CHECK-NEXT: store i64 16, i64* [[REG7:[0-9a-zA-Z_%.]+]], align 8
|
||||
// CHECK-NEXT: [[REG8:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG5]], align 8
|
||||
// CHECK-NEXT: [[REG9:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG7]], align 8
|
||||
// CHECK-NEXT: [[REG10:[0-9a-zA-Z_%.]+]] = icmp eq i64 [[REG8]], [[REG9]]
|
||||
// CHECK-NEXT: br i1 [[REG10]], label %[[REG11:[0-9a-zA-Z_%.]+]], label %[[REG12:[0-9a-zA-Z_%.]+]]
|
||||
// CHECK: [[REG11]]:
|
||||
// CHECK-NEXT: [[REG13:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG5]], align 8
|
||||
// CHECK-NEXT: [[REG14:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG6]], align 8
|
||||
// CHECK-NEXT: [[REG15:[0-9a-zA-Z_%.]+]] = icmp eq i64 [[REG13]], [[REG14]]
|
||||
// CHECK-NEXT: br i1 [[REG15]], label %[[REG16:[0-9a-zA-Z_%.]+]], label %[[REG12:[0-9a-zA-Z_%.]+]]
|
||||
// CHECK: [[REG16]]:
|
||||
// CHECK-NEXT: [[REG17:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG4]], align 8
|
||||
// CHECK-NEXT: [[REG18:[0-9a-zA-Z_%.]+]] = call noalias i8* @malloc(i64 [[REG17]])
|
||||
// CHECK-NEXT: store i8* [[REG18]], i8** [[REG3]], align 8
|
||||
// CHECK-NEXT: br label %[[REG19:[0-9a-zA-Z_%.]+]]
|
||||
// CHECK: [[REG12]]:
|
||||
// CHECK-NEXT: [[REG20:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG5]], align 8
|
||||
// CHECK-NEXT: [[REG21:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG6]], align 8
|
||||
// CHECK-NEXT: [[REG22:[0-9a-zA-Z_%.]+]] = icmp ult i64 [[REG20]], [[REG21]]
|
||||
// CHECK-NEXT: br i1 [[REG22]], label %[[REG23:[0-9a-zA-Z_%.]+]], label %[[REG24:[0-9a-zA-Z_%.]+]]
|
||||
// CHECK: [[REG23]]:
|
||||
// CHECK-NEXT: [[REG25:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG6]], align 8
|
||||
// CHECK-NEXT: store i64 [[REG25]], i64* [[REG5]], align 8
|
||||
// CHECK-NEXT: br label %[[REG12:[0-9a-zA-Z_%.]+]]4
|
||||
// CHECK: [[REG24]]:
|
||||
// CHECK-NEXT: [[REG26:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG5]], align 8
|
||||
// CHECK-NEXT: [[REG27:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG4]], align 8
|
||||
// CHECK-NEXT: [[REG28:[0-9a-zA-Z_%.]+]] = call signext i32 @posix_memalign(i8** [[REG29:[0-9a-zA-Z_%.]+]], i64 [[REG26]], i64 [[REG27]])
|
||||
// CHECK-NEXT: [[REG30:[0-9a-zA-Z_%.]+]] = icmp eq i32 [[REG28]], 0
|
||||
// CHECK-NEXT: br i1 [[REG30]], label %[[REG31:[0-9a-zA-Z_%.]+]], label %[[REG32:[0-9a-zA-Z_%.]+]]
|
||||
// CHECK: [[REG31]]:
|
||||
// CHECK-NEXT: [[REG33:[0-9a-zA-Z_%.]+]] = load i8*, i8** [[REG29]], align 8
|
||||
// CHECK-NEXT: store i8* [[REG33]], i8** [[REG3]], align 8
|
||||
// CHECK-NEXT: br label %[[REG19:[0-9a-zA-Z_%.]+]]
|
||||
// CHECK: [[REG32]]:
|
||||
// CHECK-NEXT: store i8* null, i8** [[REG3]], align 8
|
||||
// CHECK-NEXT: br label %[[REG19:[0-9a-zA-Z_%.]+]]
|
||||
// CHECK: [[REG19]]:
|
||||
// CHECK-NEXT: [[REG34:[0-9a-zA-Z_%.]+]] = load i8*, i8** [[REG3]], align 8
|
||||
// CHECK-NEXT: ret i8* [[REG34]]
|
||||
|
||||
// CHECK: define internal void @_mm_free(i8* [[REG35:[0-9a-zA-Z_%.]+]])
|
||||
// CHECK: store i8* [[REG35]], i8** [[REG36:[0-9a-zA-Z_%.]+]], align 8
|
||||
// CHECK-NEXT: [[REG37:[0-9a-zA-Z_%.]+]] = load i8*, i8** [[REG36]], align 8
|
||||
// CHECK-NEXT: call void @free(i8* [[REG37]])
|
||||
// CHECK-NEXT: ret void
|
|
@ -0,0 +1,72 @@
|
|||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
|
||||
// REQUIRES: powerpc-registered-target
|
||||
// UNSUPPORTED: !powerpc64-
|
||||
// The stdlib.h included in mm_malloc.h references native system header
|
||||
// like: bits/libc-header-start.h or features.h, cross-compile it may
|
||||
// require installing target headers in build env, otherwise expecting
|
||||
// failures. So this test will focus on native build only.
|
||||
|
||||
// RUN: %clang -target powerpc64-unknown-linux-gnu -S -emit-llvm %s -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt | FileCheck %s
|
||||
|
||||
#include <mm_malloc.h>
|
||||
|
||||
|
||||
void __attribute__((noinline))
|
||||
test_mm_malloc() {
|
||||
char *buf = _mm_malloc(100, 16);
|
||||
_mm_free(buf);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_mm_malloc
|
||||
|
||||
// CHECK: define internal i8* @_mm_malloc(i64 [[REG1:[0-9a-zA-Z_%.]+]], i64 [[REG2:[0-9a-zA-Z_%.]+]])
|
||||
// CHECK: [[REG3:[0-9a-zA-Z_%.]+]] = alloca i8*, align 8
|
||||
// CHECK: store i64 [[REG1]], i64* [[REG4:[0-9a-zA-Z_%.]+]], align 8
|
||||
// CHECK-NEXT: store i64 [[REG2]], i64* [[REG5:[0-9a-zA-Z_%.]+]], align 8
|
||||
// CHECK-NEXT: store i64 16, i64* [[REG6:[0-9a-zA-Z_%.]+]], align 8
|
||||
// CHECK-NEXT: store i64 16, i64* [[REG7:[0-9a-zA-Z_%.]+]], align 8
|
||||
// CHECK-NEXT: [[REG8:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG5]], align 8
|
||||
// CHECK-NEXT: [[REG9:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG7]], align 8
|
||||
// CHECK-NEXT: [[REG10:[0-9a-zA-Z_%.]+]] = icmp eq i64 [[REG8]], [[REG9]]
|
||||
// CHECK-NEXT: br i1 [[REG10]], label %[[REG11:[0-9a-zA-Z_%.]+]], label %[[REG12:[0-9a-zA-Z_%.]+]]
|
||||
// CHECK: [[REG11]]:
|
||||
// CHECK-NEXT: [[REG13:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG5]], align 8
|
||||
// CHECK-NEXT: [[REG14:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG6]], align 8
|
||||
// CHECK-NEXT: [[REG15:[0-9a-zA-Z_%.]+]] = icmp eq i64 [[REG13]], [[REG14]]
|
||||
// CHECK-NEXT: br i1 [[REG15]], label %[[REG16:[0-9a-zA-Z_%.]+]], label %[[REG12:[0-9a-zA-Z_%.]+]]
|
||||
// CHECK: [[REG16]]:
|
||||
// CHECK-NEXT: [[REG17:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG4]], align 8
|
||||
// CHECK-NEXT: [[REG18:[0-9a-zA-Z_%.]+]] = call noalias i8* @malloc(i64 [[REG17]])
|
||||
// CHECK-NEXT: store i8* [[REG18]], i8** [[REG3]], align 8
|
||||
// CHECK-NEXT: br label %[[REG19:[0-9a-zA-Z_%.]+]]
|
||||
// CHECK: [[REG12]]:
|
||||
// CHECK-NEXT: [[REG20:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG5]], align 8
|
||||
// CHECK-NEXT: [[REG21:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG6]], align 8
|
||||
// CHECK-NEXT: [[REG22:[0-9a-zA-Z_%.]+]] = icmp ult i64 [[REG20]], [[REG21]]
|
||||
// CHECK-NEXT: br i1 [[REG22]], label %[[REG23:[0-9a-zA-Z_%.]+]], label %[[REG24:[0-9a-zA-Z_%.]+]]
|
||||
// CHECK: [[REG23]]:
|
||||
// CHECK-NEXT: [[REG25:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG6]], align 8
|
||||
// CHECK-NEXT: store i64 [[REG25]], i64* [[REG5]], align 8
|
||||
// CHECK-NEXT: br label %[[REG12:[0-9a-zA-Z_%.]+]]4
|
||||
// CHECK: [[REG24]]:
|
||||
// CHECK-NEXT: [[REG26:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG5]], align 8
|
||||
// CHECK-NEXT: [[REG27:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG4]], align 8
|
||||
// CHECK-NEXT: [[REG28:[0-9a-zA-Z_%.]+]] = call signext i32 @posix_memalign(i8** [[REG29:[0-9a-zA-Z_%.]+]], i64 [[REG26]], i64 [[REG27]])
|
||||
// CHECK-NEXT: [[REG30:[0-9a-zA-Z_%.]+]] = icmp eq i32 [[REG28]], 0
|
||||
// CHECK-NEXT: br i1 [[REG30]], label %[[REG31:[0-9a-zA-Z_%.]+]], label %[[REG32:[0-9a-zA-Z_%.]+]]
|
||||
// CHECK: [[REG31]]:
|
||||
// CHECK-NEXT: [[REG33:[0-9a-zA-Z_%.]+]] = load i8*, i8** [[REG29]], align 8
|
||||
// CHECK-NEXT: store i8* [[REG33]], i8** [[REG3]], align 8
|
||||
// CHECK-NEXT: br label %[[REG19:[0-9a-zA-Z_%.]+]]
|
||||
// CHECK: [[REG32]]:
|
||||
// CHECK-NEXT: store i8* null, i8** [[REG3]], align 8
|
||||
// CHECK-NEXT: br label %[[REG19:[0-9a-zA-Z_%.]+]]
|
||||
// CHECK: [[REG19]]:
|
||||
// CHECK-NEXT: [[REG34:[0-9a-zA-Z_%.]+]] = load i8*, i8** [[REG3]], align 8
|
||||
// CHECK-NEXT: ret i8* [[REG34]]
|
||||
|
||||
// CHECK: define internal void @_mm_free(i8* [[REG35:[0-9a-zA-Z_%.]+]])
|
||||
// CHECK: store i8* [[REG35]], i8** [[REG36:[0-9a-zA-Z_%.]+]], align 8
|
||||
// CHECK-NEXT: [[REG37:[0-9a-zA-Z_%.]+]] = load i8*, i8** [[REG36]], align 8
|
||||
// CHECK-NEXT: call void @free(i8* [[REG37]])
|
||||
// CHECK-NEXT: ret void
|
|
@ -1,12 +1,13 @@
|
|||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
|
||||
// REQUIRES: powerpc-registered-target
|
||||
|
||||
// RUN: %clang -S -emit-llvm -target powerpc64-gnu-linux -mcpu=pwr8 -DNO_WARN_X86_INTRINSICS %s \
|
||||
// RUN: %clang -S -emit-llvm -target powerpc64-unknown-linux-gnu -mcpu=pwr8 -DNO_WARN_X86_INTRINSICS %s \
|
||||
// RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt | FileCheck %s --check-prefixes=CHECK-P8,CHECK,CHECK-BE
|
||||
// RUN: %clang -S -emit-llvm -target powerpc64le-gnu-linux -mcpu=pwr8 -DNO_WARN_X86_INTRINSICS %s \
|
||||
// RUN: %clang -S -emit-llvm -target powerpc64le-unknown-linux-gnu -mcpu=pwr8 -DNO_WARN_X86_INTRINSICS %s \
|
||||
// RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt | FileCheck %s --check-prefixes=CHECK-P8,CHECK,CHECK-LE
|
||||
// RUN: %clang -S -emit-llvm -target powerpc64-gnu-linux -mcpu=pwr9 -DNO_WARN_X86_INTRINSICS %s \
|
||||
// RUN: %clang -S -emit-llvm -target powerpc64-unknown-linux-gnu -mcpu=pwr9 -DNO_WARN_X86_INTRINSICS %s \
|
||||
// RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt | FileCheck %s --check-prefixes=CHECK-P9,CHECK,CHECK-BE
|
||||
// RUN: %clang -S -emit-llvm -target powerpc64le-gnu-linux -mcpu=pwr9 -DNO_WARN_X86_INTRINSICS %s \
|
||||
// RUN: %clang -S -emit-llvm -target powerpc64le-unknown-linux-gnu -mcpu=pwr9 -DNO_WARN_X86_INTRINSICS %s \
|
||||
// RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt | FileCheck %s --check-prefixes=CHECK-P9,CHECK,CHECK-LE
|
||||
|
||||
#include <mmintrin.h>
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,13 +0,0 @@
|
|||
// REQUIRES: powerpc-registered-target
|
||||
|
||||
// RUN: %clang -S -emit-llvm -DNO_WARN_X86_INTRINSICS -target powerpc64-gnu-linux %s -Xclang -verify -o - | FileCheck %s
|
||||
// RUN: %clang -S -emit-llvm -DNO_WARN_X86_INTRINSICS -target powerpc64-gnu-linux %s -Xclang -verify -x c++ -o - | FileCheck %s
|
||||
// expected-no-diagnostics
|
||||
|
||||
// RUN: not %clang -S -emit-llvm -target powerpc64-gnu-linux %s -o /dev/null 2>&1 | FileCheck %s -check-prefix=CHECK-ERROR
|
||||
|
||||
#include <mmintrin.h>
|
||||
// CHECK-ERROR: mmintrin.h:{{[0-9]+}}:{{[0-9]+}}: error: "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this error."
|
||||
|
||||
// CHECK: target triple = "powerpc64-
|
||||
// CHECK: !llvm.module.flags =
|
|
@ -0,0 +1,11 @@
|
|||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
|
||||
// REQUIRES: powerpc-registered-target
|
||||
|
||||
// RUN: %clang -S -emit-llvm -DNO_WARN_X86_INTRINSICS -mcpu=pwr7 -target powerpc64-unknown-linux-gnu %s -Xclang -verify
|
||||
// RUN: %clang -S -emit-llvm -DNO_WARN_X86_INTRINSICS -mcpu=pwr7 -target powerpc64-unknown-linux-gnu %s -Xclang -verify -x c++
|
||||
// expected-no-diagnostics
|
||||
|
||||
// RUN: not %clang -S -emit-llvm -target powerpc64-unknown-linux-gnu -mcpu=pwr7 %s -o /dev/null 2>&1 | FileCheck %s -check-prefix=CHECK-ERROR
|
||||
|
||||
#include <mmintrin.h>
|
||||
// CHECK-ERROR: mmintrin.h:{{[0-9]+}}:{{[0-9]+}}: error: "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this error."
|
|
@ -0,0 +1,22 @@
|
|||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
|
||||
// REQUIRES: powerpc-registered-target
|
||||
|
||||
// Since mm_malloc.h references system native stdlib.h, doing cross-compile
|
||||
// testing may cause unexpected problems. This would affect xmmintrin.h and
|
||||
// other following intrinsics headers. If there's need to test them using
|
||||
// cross-compile, please add -ffreestanding to compiler options, like
|
||||
// test/CodeGen/ppc-xmmintrin.c.
|
||||
|
||||
// RUN: %clang -target powerpc64-unknown-linux-gnu -S -emit-llvm -DNO_WARN_X86_INTRINSICS %s -mcpu=pwr7 -Xclang -verify
|
||||
// RUN: %clang -target powerpc64-unknown-linux-gnu -S -emit-llvm -DNO_WARN_X86_INTRINSICS %s -mcpu=pwr7 -Xclang -verify -x c++
|
||||
// expected-no-diagnostics
|
||||
|
||||
// RUN: not %clang -target powerpc64-unknown-linux-gnu -S -emit-llvm %s -mcpu=pwr7 -o /dev/null 2>&1 | FileCheck %s -check-prefix=SSE-ERROR
|
||||
|
||||
// Don't include mm_malloc.h, it's system specific.
|
||||
#define _MM_MALLOC_H_INCLUDED
|
||||
|
||||
// Altivec must be enabled.
|
||||
#include <xmmintrin.h>
|
||||
|
||||
// SSE-ERROR: xmmintrin.h:{{[0-9]+}}:{{[0-9]+}}: error: "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this error."
|
Loading…
Reference in New Issue