forked from OSchip/llvm-project
[PowerPC] [Clang] Port SSE intrinsics to PowerPC
Port xmmintrin.h which include Intel SSE intrinsics implementation to PowerPC platform (using Altivec). The new headers containing those implemenations are located into a directory named ppc_wrappers which has higher priority when the platform is PowerPC on Linux. They are mainly developed by Steven Munroe, with contributions from Paul Clarke, Bill Schmidt, Jinsong Ji and Zixuan Wu. Patched by: Qiu Chaofan <qiucf@cn.ibm.com> Reviewed By: Jinsong Ji Differential Revision: https://reviews.llvm.org/D62121 llvm-svn: 361928
This commit is contained in:
parent
c77aff7e17
commit
b3bcbb5b66
|
@ -126,6 +126,8 @@ set(cuda_wrapper_files
|
|||
|
||||
set(ppc_wrapper_files
|
||||
ppc_wrappers/mmintrin.h
|
||||
ppc_wrappers/xmmintrin.h
|
||||
ppc_wrappers/mm_malloc.h
|
||||
)
|
||||
|
||||
set(openmp_wrapper_files
|
||||
|
|
|
@ -0,0 +1,48 @@
|
|||
/*===---- mm_malloc.h - Implementation of _mm_malloc and _mm_free ----------===
|
||||
*
|
||||
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
#ifndef _MM_MALLOC_H_INCLUDED
|
||||
#define _MM_MALLOC_H_INCLUDED
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
/* We can't depend on <stdlib.h> since the prototype of posix_memalign
|
||||
may not be visible. */
|
||||
#ifndef __cplusplus
|
||||
extern int posix_memalign (void **, size_t, size_t);
|
||||
#else
|
||||
extern "C" int posix_memalign (void **, size_t, size_t) throw ();
|
||||
#endif
|
||||
|
||||
static __inline void *
|
||||
_mm_malloc (size_t size, size_t alignment)
|
||||
{
|
||||
/* PowerPC64 ELF V2 ABI requires quadword alignment. */
|
||||
size_t vec_align = sizeof (__vector float);
|
||||
/* Linux GLIBC malloc alignment is at least 2 X ptr size. */
|
||||
size_t malloc_align = (sizeof (void *) + sizeof (void *));
|
||||
void *ptr;
|
||||
|
||||
if (alignment == malloc_align && alignment == vec_align)
|
||||
return malloc (size);
|
||||
if (alignment < vec_align)
|
||||
alignment = vec_align;
|
||||
if (posix_memalign (&ptr, alignment, size) == 0)
|
||||
return ptr;
|
||||
else
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static __inline void
|
||||
_mm_free (void * ptr)
|
||||
{
|
||||
free (ptr);
|
||||
}
|
||||
|
||||
#endif /* _MM_MALLOC_H_INCLUDED */
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,71 @@
|
|||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
|
||||
// REQUIRES: powerpc-registered-target
|
||||
|
||||
// The stdlib.h included in mm_malloc.h references native system header
|
||||
// like: bits/libc-header-start.h or features.h, cross-compile it may
|
||||
// require installing target headers in build env, otherwise expecting
|
||||
// failures. So this test will focus on native build only.
|
||||
|
||||
// RUN: %clang -S -emit-llvm %s -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt | FileCheck %s
|
||||
|
||||
#include <mm_malloc.h>
|
||||
|
||||
void __attribute__((noinline))
|
||||
test_mm_malloc() {
|
||||
char *buf = _mm_malloc(100, 16);
|
||||
_mm_free(buf);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_mm_malloc
|
||||
|
||||
// CHECK: define internal i8* @_mm_malloc(i64 [[REG1:[0-9a-zA-Z_%.]+]], i64 [[REG2:[0-9a-zA-Z_%.]+]])
|
||||
// CHECK: [[REG3:[0-9a-zA-Z_%.]+]] = alloca i8*, align 8
|
||||
// CHECK: store i64 [[REG1]], i64* [[REG4:[0-9a-zA-Z_%.]+]], align 8
|
||||
// CHECK-NEXT: store i64 [[REG2]], i64* [[REG5:[0-9a-zA-Z_%.]+]], align 8
|
||||
// CHECK-NEXT: store i64 16, i64* [[REG6:[0-9a-zA-Z_%.]+]], align 8
|
||||
// CHECK-NEXT: store i64 16, i64* [[REG7:[0-9a-zA-Z_%.]+]], align 8
|
||||
// CHECK-NEXT: [[REG8:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG5]], align 8
|
||||
// CHECK-NEXT: [[REG9:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG7]], align 8
|
||||
// CHECK-NEXT: [[REG10:[0-9a-zA-Z_%.]+]] = icmp eq i64 [[REG8]], [[REG9]]
|
||||
// CHECK-NEXT: br i1 [[REG10]], label %[[REG11:[0-9a-zA-Z_%.]+]], label %[[REG12:[0-9a-zA-Z_%.]+]]
|
||||
// CHECK: [[REG11]]:
|
||||
// CHECK-NEXT: [[REG13:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG5]], align 8
|
||||
// CHECK-NEXT: [[REG14:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG6]], align 8
|
||||
// CHECK-NEXT: [[REG15:[0-9a-zA-Z_%.]+]] = icmp eq i64 [[REG13]], [[REG14]]
|
||||
// CHECK-NEXT: br i1 [[REG15]], label %[[REG16:[0-9a-zA-Z_%.]+]], label %[[REG12:[0-9a-zA-Z_%.]+]]
|
||||
// CHECK: [[REG16]]:
|
||||
// CHECK-NEXT: [[REG17:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG4]], align 8
|
||||
// CHECK-NEXT: [[REG18:[0-9a-zA-Z_%.]+]] = call noalias i8* @malloc(i64 [[REG17]])
|
||||
// CHECK-NEXT: store i8* [[REG18]], i8** [[REG3]], align 8
|
||||
// CHECK-NEXT: br label %[[REG19:[0-9a-zA-Z_%.]+]]
|
||||
// CHECK: [[REG12]]:
|
||||
// CHECK-NEXT: [[REG20:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG5]], align 8
|
||||
// CHECK-NEXT: [[REG21:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG6]], align 8
|
||||
// CHECK-NEXT: [[REG22:[0-9a-zA-Z_%.]+]] = icmp ult i64 [[REG20]], [[REG21]]
|
||||
// CHECK-NEXT: br i1 [[REG22]], label %[[REG23:[0-9a-zA-Z_%.]+]], label %[[REG24:[0-9a-zA-Z_%.]+]]
|
||||
// CHECK: [[REG23]]:
|
||||
// CHECK-NEXT: [[REG25:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG6]], align 8
|
||||
// CHECK-NEXT: store i64 [[REG25]], i64* [[REG5]], align 8
|
||||
// CHECK-NEXT: br label %[[REG12:[0-9a-zA-Z_%.]+]]4
|
||||
// CHECK: [[REG24]]:
|
||||
// CHECK-NEXT: [[REG26:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG5]], align 8
|
||||
// CHECK-NEXT: [[REG27:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG4]], align 8
|
||||
// CHECK-NEXT: [[REG28:[0-9a-zA-Z_%.]+]] = call signext i32 @posix_memalign(i8** [[REG29:[0-9a-zA-Z_%.]+]], i64 [[REG26]], i64 [[REG27]])
|
||||
// CHECK-NEXT: [[REG30:[0-9a-zA-Z_%.]+]] = icmp eq i32 [[REG28]], 0
|
||||
// CHECK-NEXT: br i1 [[REG30]], label %[[REG31:[0-9a-zA-Z_%.]+]], label %[[REG32:[0-9a-zA-Z_%.]+]]
|
||||
// CHECK: [[REG31]]:
|
||||
// CHECK-NEXT: [[REG33:[0-9a-zA-Z_%.]+]] = load i8*, i8** [[REG29]], align 8
|
||||
// CHECK-NEXT: store i8* [[REG33]], i8** [[REG3]], align 8
|
||||
// CHECK-NEXT: br label %[[REG19:[0-9a-zA-Z_%.]+]]
|
||||
// CHECK: [[REG32]]:
|
||||
// CHECK-NEXT: store i8* null, i8** [[REG3]], align 8
|
||||
// CHECK-NEXT: br label %[[REG19:[0-9a-zA-Z_%.]+]]
|
||||
// CHECK: [[REG19]]:
|
||||
// CHECK-NEXT: [[REG34:[0-9a-zA-Z_%.]+]] = load i8*, i8** [[REG3]], align 8
|
||||
// CHECK-NEXT: ret i8* [[REG34]]
|
||||
|
||||
// CHECK: define internal void @_mm_free(i8* [[REG35:[0-9a-zA-Z_%.]+]])
|
||||
// CHECK: store i8* [[REG35]], i8** [[REG36:[0-9a-zA-Z_%.]+]], align 8
|
||||
// CHECK-NEXT: [[REG37:[0-9a-zA-Z_%.]+]] = load i8*, i8** [[REG36]], align 8
|
||||
// CHECK-NEXT: call void @free(i8* [[REG37]])
|
||||
// CHECK-NEXT: ret void
|
|
@ -1,3 +1,4 @@
|
|||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
|
||||
// REQUIRES: powerpc-registered-target
|
||||
|
||||
// RUN: %clang -S -emit-llvm -target powerpc64-gnu-linux -mcpu=pwr8 -DNO_WARN_X86_INTRINSICS %s \
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,13 +0,0 @@
|
|||
// REQUIRES: powerpc-registered-target
|
||||
|
||||
// RUN: %clang -S -emit-llvm -DNO_WARN_X86_INTRINSICS -target powerpc64-gnu-linux %s -Xclang -verify -o - | FileCheck %s
|
||||
// RUN: %clang -S -emit-llvm -DNO_WARN_X86_INTRINSICS -target powerpc64-gnu-linux %s -Xclang -verify -x c++ -o - | FileCheck %s
|
||||
// expected-no-diagnostics
|
||||
|
||||
// RUN: not %clang -S -emit-llvm -target powerpc64-gnu-linux %s -o /dev/null 2>&1 | FileCheck %s -check-prefix=CHECK-ERROR
|
||||
|
||||
#include <mmintrin.h>
|
||||
// CHECK-ERROR: mmintrin.h:{{[0-9]+}}:{{[0-9]+}}: error: "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this error."
|
||||
|
||||
// CHECK: target triple = "powerpc64-
|
||||
// CHECK: !llvm.module.flags =
|
|
@ -0,0 +1,11 @@
|
|||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
|
||||
// REQUIRES: powerpc-registered-target
|
||||
|
||||
// RUN: %clang -S -emit-llvm -DNO_WARN_X86_INTRINSICS -mcpu=pwr7 -target powerpc64-gnu-linux %s -Xclang -verify
|
||||
// RUN: %clang -S -emit-llvm -DNO_WARN_X86_INTRINSICS -mcpu=pwr7 -target powerpc64-gnu-linux %s -Xclang -verify -x c++
|
||||
// expected-no-diagnostics
|
||||
|
||||
// RUN: not %clang -S -emit-llvm -target powerpc64-gnu-linux -mcpu=pwr7 %s -o /dev/null 2>&1 | FileCheck %s -check-prefix=CHECK-ERROR
|
||||
|
||||
#include <mmintrin.h>
|
||||
// CHECK-ERROR: mmintrin.h:{{[0-9]+}}:{{[0-9]+}}: error: "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this error."
|
|
@ -0,0 +1,20 @@
|
|||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
|
||||
// REQUIRES: powerpc-registered-target
|
||||
|
||||
// Since mm_malloc.h references system native stdlib.h, doing cross-compile
|
||||
// testing may cause unexpected problems. This would affect xmmintrin.h and
|
||||
// other following intrinsics headers. If there's need to test them using
|
||||
// cross-compile, please add -ffreestanding to compiler options, like
|
||||
// test/CodeGen/ppc-xmmintrin.c.
|
||||
|
||||
// RUN: %clang -S -emit-llvm -DNO_WARN_X86_INTRINSICS %s -mcpu=pwr7 -Xclang -verify
|
||||
// RUN: %clang -S -emit-llvm -DNO_WARN_X86_INTRINSICS %s -mcpu=pwr7 -Xclang -verify -x c++
|
||||
// expected-no-diagnostics
|
||||
|
||||
// RUN: not %clang -S -emit-llvm %s -mcpu=pwr7 -o /dev/null 2>&1 | FileCheck %s -check-prefix=SSE-ERROR
|
||||
|
||||
// Altivec must be enabled.
|
||||
#include <xmmintrin.h>
|
||||
|
||||
#include <mm_malloc.h>
|
||||
// SSE-ERROR: xmmintrin.h:{{[0-9]+}}:{{[0-9]+}}: error: "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this error."
|
Loading…
Reference in New Issue