[PowerPC] [Clang] Port MMX intrinsics and basic test cases to Power

Port mmintrin.h which include x86 MMX intrinsics implementation to PowerPC platform (using Altivec).

To make the include process correct, PowerPC's toolchain class is overrided to insert new headers directory (named ppc_wrappers) into the path. Basic test cases for several intrinsic functions are added.

The header is mainly developed by Steven Munroe, with contributions from Paul Clarke, Bill Schmidt, Jinsong Ji and Zixuan Wu.

Reviewed By: Jinsong Ji

Differential Revision: https://reviews.llvm.org/D59924

llvm-svn: 358949
This commit is contained in:
Qiu Chaofan 2019-04-23 05:50:24 +00:00
parent 9da81421b8
commit 19828e399b
8 changed files with 1606 additions and 1 deletions

View File

@ -65,6 +65,7 @@ add_clang_library(clangDriver
ToolChains/TCE.cpp
ToolChains/WebAssembly.cpp
ToolChains/XCore.cpp
ToolChains/PPCLinux.cpp
Types.cpp
XRayArgs.cpp

View File

@ -38,6 +38,7 @@
#include "ToolChains/NetBSD.h"
#include "ToolChains/OpenBSD.h"
#include "ToolChains/PS4CPU.h"
#include "ToolChains/PPCLinux.h"
#include "ToolChains/RISCVToolchain.h"
#include "ToolChains/Solaris.h"
#include "ToolChains/TCE.h"
@ -4576,6 +4577,11 @@ const ToolChain &Driver::getToolChain(const ArgList &Args,
!Target.hasEnvironment())
TC = llvm::make_unique<toolchains::MipsLLVMToolChain>(*this, Target,
Args);
else if (Target.getArch() == llvm::Triple::ppc ||
Target.getArch() == llvm::Triple::ppc64 ||
Target.getArch() == llvm::Triple::ppc64le)
TC = llvm::make_unique<toolchains::PPCLinuxToolChain>(*this, Target,
Args);
else
TC = llvm::make_unique<toolchains::Linux>(*this, Target, Args);
break;

View File

@ -0,0 +1,31 @@
//===-- PPCLinux.cpp - PowerPC ToolChain Implementations --------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "PPCLinux.h"
#include "clang/Driver/Driver.h"
#include "clang/Driver/Options.h"
#include "llvm/Support/Path.h"
using namespace clang::driver::toolchains;
using namespace llvm::opt;
void PPCLinuxToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
ArgStringList &CC1Args) const {
// PPC wrapper headers are implementation of x86 intrinsics on PowerPC, which
// is not supported on PPC32 platform.
if (getArch() != llvm::Triple::ppc &&
!DriverArgs.hasArg(clang::driver::options::OPT_nostdinc) &&
!DriverArgs.hasArg(options::OPT_nobuiltininc)) {
const Driver &D = getDriver();
SmallString<128> P(D.ResourceDir);
llvm::sys::path::append(P, "include", "ppc_wrappers");
addSystemInclude(DriverArgs, CC1Args, P);
}
Linux::AddClangSystemIncludeArgs(DriverArgs, CC1Args);
}

View File

@ -0,0 +1,33 @@
//===--- PPCLinux.h - PowerPC ToolChain Implementations ---------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_PPC_LINUX_H
#define LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_PPC_LINUX_H
#include "Linux.h"
namespace clang {
namespace driver {
namespace toolchains {
class LLVM_LIBRARY_VISIBILITY PPCLinuxToolChain : public Linux {
public:
PPCLinuxToolChain(const Driver &D, const llvm::Triple &Triple,
const llvm::opt::ArgList &Args)
: Linux(D, Triple, Args) {}
void
AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args) const override;
};
} // end namespace toolchains
} // end namespace driver
} // end namespace clang
#endif // LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_PPC_LINUX_H

View File

@ -122,6 +122,10 @@ set(cuda_wrapper_files
cuda_wrappers/new
)
set(ppc_wrapper_files
ppc_wrappers/mmintrin.h
)
set(output_dir ${LLVM_LIBRARY_OUTPUT_INTDIR}/clang/${CLANG_VERSION}/include)
set(out_files)
@ -147,7 +151,7 @@ endfunction(clang_generate_header)
# Copy header files from the source directory to the build directory
foreach( f ${files} ${cuda_wrapper_files} )
foreach( f ${files} ${cuda_wrapper_files} ${ppc_wrapper_files} )
copy_header_to_output_dir(${CMAKE_CURRENT_SOURCE_DIR} ${f})
endforeach( f )

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,60 @@
// REQUIRES: powerpc-registered-target
// RUN: %clang -S -emit-llvm -DNO_WARN_X86_INTRINSICS -mcpu=pwr8 -target powerpc64-gnu-linux %s \
// RUN: -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt | FileCheck %s --check-prefixes=CHECK,CHECK-BE
// RUN: %clang -S -emit-llvm -DNO_WARN_X86_INTRINSICS -mcpu=pwr8 -target powerpc64le-gnu-linux %s \
// RUN: -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt | FileCheck %s --check-prefixes=CHECK,CHECK-LE
#include <mmintrin.h>
unsigned long long int ull1, ull2;
__m64 m1, m2, res;
void __attribute__((noinline))
test_packs() {
res = _mm_packs_pu16((__m64)ull1, (__m64)ull2);
res = _mm_packs_pi16((__m64)ull1, (__m64)ull2);
res = _mm_packs_pi32((__m64)ull1, (__m64)ull2);
}
// CHECK-LABEL: @test_packs
// CHECK: define available_externally i64 @_mm_packs_pu16(i64 [[REG1:[0-9a-zA-Z_%.]+]], i64 [[REG2:[0-9a-zA-Z_%.]+]])
// CHECK: store i64 [[REG1]], i64* [[REG3:[0-9a-zA-Z_%.]+]], align 8
// CHECK-NEXT: store i64 [[REG2]], i64* [[REG4:[0-9a-zA-Z_%.]+]], align 8
// CHECK-LE: load i64, i64* [[REG3]], align 8
// CHECK: load i64, i64* [[REG4]], align 8
// CHECK-BE: load i64, i64* [[REG3]], align 8
// CHECK: [[REG5:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_cmplt
// CHECK-NEXT: store <8 x i16> [[REG5]], <8 x i16>* [[REG6:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG7:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG8:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG9:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG8]], align 16
// CHECK-NEXT: [[REG10:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_packs(unsigned short vector[8], unsigned short vector[8])(<8 x i16> [[REG7]], <8 x i16> [[REG9]])
// CHECK-NEXT: store <16 x i8> [[REG10]], <16 x i8>* [[REG11:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG12:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG6]], align 16
// CHECK-NEXT: [[REG13:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG6]], align 16
// CHECK-NEXT: [[REG14:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_pack(bool vector[8], bool vector[8])(<8 x i16> [[REG12]], <8 x i16> [[REG13]])
// CHECK-NEXT: store <16 x i8> [[REG14]], <16 x i8>* [[REG15:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG16:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG11]], align 16
// CHECK-NEXT: [[REG17:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG15]], align 16
// CHECK-NEXT: call <16 x i8> @vec_sel(unsigned char vector[16], unsigned char vector[16], bool vector[16])(<16 x i8> [[REG16]], <16 x i8> zeroinitializer, <16 x i8> [[REG17]])
// CHECK: define available_externally i64 @_mm_packs_pi16(i64 [[REG18:[0-9a-zA-Z_%.]+]], i64 [[REG19:[0-9a-zA-Z_%.]+]])
// CHECK: store i64 [[REG18]], i64* [[REG20:[0-9a-zA-Z_%.]+]], align 8
// CHECK-NEXT: store i64 [[REG19]], i64* [[REG21:[0-9a-zA-Z_%.]+]], align 8
// CHECK-LE: load i64, i64* [[REG20]], align 8
// CHECK: load i64, i64* [[REG21]], align 8
// CHECK-BE: load i64, i64* [[REG20]], align 8
// CHECK: [[REG22:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG23:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG24:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG23]], align 16
// CHECK-NEXT: call <16 x i8> @vec_packs(short vector[8], short vector[8])(<8 x i16> [[REG22]], <8 x i16> [[REG24]])
// CHECK: define available_externally i64 @_mm_packs_pi32(i64 [[REG25:[0-9a-zA-Z_%.]+]], i64 [[REG26:[0-9a-zA-Z_%.]+]])
// CHECK: store i64 [[REG25]], i64* [[REG27:[0-9a-zA-Z_%.]+]], align 8
// CHECK-NEXT: store i64 [[REG26]], i64* [[REG28:[0-9a-zA-Z_%.]+]], align 8
// CHECK-LE: load i64, i64* [[REG27]], align 8
// CHECK: load i64, i64* [[REG28]], align 8
// CHECK-BE: load i64, i64* [[REG27]], align 8
// CHECK: [[REG29:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG30:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG31:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG30]], align 16
// CHECK-NEXT: call <8 x i16> @vec_packs(int vector[4], int vector[4])(<4 x i32> [[REG29]], <4 x i32> [[REG31]])

View File

@ -0,0 +1,13 @@
// REQUIRES: powerpc-registered-target
// RUN: %clang -S -emit-llvm -DNO_WARN_X86_INTRINSICS -target powerpc64-gnu-linux %s -Xclang -verify -o - | FileCheck %s
// RUN: %clang -S -emit-llvm -DNO_WARN_X86_INTRINSICS -target powerpc64-gnu-linux %s -Xclang -verify -x c++ -o - | FileCheck %s
// expected-no-diagnostics
// RUN: not %clang -S -emit-llvm -target powerpc64-gnu-linux %s -o /dev/null 2>&1 | FileCheck %s -check-prefix=CHECK-ERROR
#include <mmintrin.h>
// CHECK-ERROR: mmintrin.h:{{[0-9]+}}:{{[0-9]+}}: error: "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this error."
// CHECK: target triple = "powerpc64-
// CHECK: !llvm.module.flags =