[PowerPC] Added multiple PowerPC builtins

This is the first in a series of patches to provide builtins for
compatibility with the XL compiler. Most of the builtins already had
intrinsics and only needed to be implemented in the front end.
Intrinsics were created for the three iospace builtins, eieio, and icbt.
Pseudo instructions were created for eieio and iospace_eieio to
ensure that nops were inserted before the eieio instruction.

Reviewed By: nemanjai, #powerpc

Differential Revision: https://reviews.llvm.org/D102443
This commit is contained in:
Quinn Pham 2021-05-27 13:35:12 -05:00 committed by Stefan Pintilie
parent 109aac9212
commit 62b5df7fe2
10 changed files with 459 additions and 13 deletions

View File

@ -29,6 +29,23 @@
#define UNALIASED_CUSTOM_BUILTIN(ID, TYPES, ACCUMULATE) \
CUSTOM_BUILTIN(ID, ID, TYPES, ACCUMULATE)
// builtins for compatibility with the XL compiler
BUILTIN(__builtin_ppc_popcntb, "ULiULi", "")
BUILTIN(__builtin_ppc_eieio, "v", "")
BUILTIN(__builtin_ppc_iospace_eieio, "v", "")
BUILTIN(__builtin_ppc_isync, "v", "")
BUILTIN(__builtin_ppc_lwsync, "v", "")
BUILTIN(__builtin_ppc_iospace_lwsync, "v", "")
BUILTIN(__builtin_ppc_sync, "v", "")
BUILTIN(__builtin_ppc_iospace_sync, "v", "")
BUILTIN(__builtin_ppc_dcbfl, "vvC*", "")
BUILTIN(__builtin_ppc_dcbflp, "vvC*", "")
BUILTIN(__builtin_ppc_dcbst, "vvC*", "")
BUILTIN(__builtin_ppc_dcbt, "vv*", "")
BUILTIN(__builtin_ppc_dcbtst, "vv*", "")
BUILTIN(__builtin_ppc_dcbz, "vv*", "")
BUILTIN(__builtin_ppc_icbt, "vv*", "")
BUILTIN(__builtin_ppc_get_timebase, "ULLi", "n")
// This is just a placeholder, the types and attributes are wrong.

View File

@ -85,6 +85,9 @@ bool PPCTargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
/// #defines that are not tied to a specific subtarget.
void PPCTargetInfo::getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const {
defineXLCompatMacros(Builder);
// Target identification.
Builder.defineMacro("__ppc__");
Builder.defineMacro("__PPC__");

View File

@ -350,6 +350,24 @@ public:
bool isSPRegName(StringRef RegName) const override {
return RegName.equals("r1") || RegName.equals("x1");
}
void defineXLCompatMacros(MacroBuilder &Builder) const {
Builder.defineMacro("__popcntb", "__builtin_ppc_popcntb");
Builder.defineMacro("__eieio", "__builtin_ppc_eieio");
Builder.defineMacro("__iospace_eieio", "__builtin_ppc_iospace_eieio");
Builder.defineMacro("__isync", "__builtin_ppc_isync");
Builder.defineMacro("__lwsync", "__builtin_ppc_lwsync");
Builder.defineMacro("__iospace_lwsync", "__builtin_ppc_iospace_lwsync");
Builder.defineMacro("__sync", "__builtin_ppc_sync");
Builder.defineMacro("__iospace_sync", "__builtin_ppc_iospace_sync");
Builder.defineMacro("__dcbfl", "__builtin_ppc_dcbfl");
Builder.defineMacro("__dcbflp", "__builtin_ppc_dcbflp");
Builder.defineMacro("__dcbst", "__builtin_ppc_dcbst");
Builder.defineMacro("__dcbt", "__builtin_ppc_dcbt");
Builder.defineMacro("__dcbtst", "__builtin_ppc_dcbtst");
Builder.defineMacro("__dcbz", "__builtin_ppc_dcbz");
Builder.defineMacro("__icbt", "__builtin_ppc_icbt");
}
};
class LLVM_LIBRARY_VISIBILITY PPC32TargetInfo : public PPCTargetInfo {

View File

@ -0,0 +1,260 @@
// RUN: %clang_cc1 -triple powerpc64-unknown-unknown \
// RUN: -emit-llvm %s -o - -target-cpu pwr8 | FileCheck %s
// RUN: %clang_cc1 -triple powerpc64le-unknown-unknown \
// RUN: -emit-llvm %s -o - -target-cpu pwr8 | FileCheck %s
void test_popcntb() {
// CHECK-LABEL: @test_popcntb(
// CHECK-NEXT: entry:
unsigned long a;
unsigned long b = __popcntb(a);
// CHECK: %1 = call i64 @llvm.ppc.popcntb(i64 %0)
}
void test_eieio() {
// CHECK-LABEL: @test_eieio(
// CHECK-NEXT: entry:
__eieio();
// CHECK: call void @llvm.ppc.eieio()
}
void test_iospace_eieio() {
// CHECK-LABEL: @test_iospace_eieio(
// CHECK-NEXT: entry:
__iospace_eieio();
// CHECK: call void @llvm.ppc.iospace.eieio()
}
void test_isync() {
// CHECK-LABEL: @test_isync(
// CHECK-NEXT: entry:
__isync();
// CHECK: call void @llvm.ppc.isync()
}
void test_lwsync() {
// CHECK-LABEL: @test_lwsync(
// CHECK-NEXT: entry:
__lwsync();
// CHECK: call void @llvm.ppc.lwsync()
}
void test_iospace_lwsync() {
// CHECK-LABEL: @test_iospace_lwsync(
// CHECK-NEXT: entry:
__iospace_lwsync();
// CHECK: call void @llvm.ppc.iospace.lwsync()
}
void test_sync() {
// CHECK-LABEL: @test_sync(
// CHECK-NEXT: entry:
__sync();
// CHECK: call void @llvm.ppc.sync()
}
void test_iospace_sync() {
// CHECK-LABEL: @test_iospace_sync(
// CHECK-NEXT: entry:
__iospace_sync();
// CHECK: call void @llvm.ppc.iospace.sync()
}
void test_dcbfl() {
// CHECK-LABEL: @test_dcbfl(
// CHECK-NEXT: entry:
const void* a;
__dcbfl(a);
// CHECK: call void @llvm.ppc.dcbfl(i8* %0)
}
void test_dcbflp() {
// CHECK-LABEL: @test_dcbflp(
// CHECK-NEXT: entry:
const void* a;
__dcbflp(a);
// CHECK: call void @llvm.ppc.dcbflp(i8* %0)
}
void test_dcbst() {
// CHECK-LABEL: @test_dcbst(
// CHECK-NEXT: entry:
const void* a;
__dcbst(a);
// CHECK: call void @llvm.ppc.dcbst(i8* %0)
}
void test_dcbt() {
// CHECK-LABEL: @test_dcbt(
// CHECK-NEXT: entry:
void* a;
__dcbt(a);
// CHECK: call void @llvm.ppc.dcbt(i8* %0)
}
void test_dcbtst() {
// CHECK-LABEL: @test_dcbtst(
// CHECK-NEXT: entry:
void* a;
__dcbtst(a);
// CHECK: call void @llvm.ppc.dcbtst(i8* %0)
}
void test_dcbz() {
// CHECK-LABEL: @test_dcbz(
// CHECK-NEXT: entry:
void* a;
__dcbz(a);
// CHECK: call void @llvm.ppc.dcbz(i8* %0)
}
void test_icbt() {
// CHECK-LABEL: @test_icbt(
// CHECK-NEXT: entry:
void* a;
__icbt(a);
// CHECK: call void @llvm.ppc.icbt(i8* %0)
}
void test_builtin_ppc_popcntb() {
// CHECK-LABEL: @test_builtin_ppc_popcntb(
// CHECK-NEXT: entry:
unsigned long a;
unsigned long b = __builtin_ppc_popcntb(a);
// CHECK: %1 = call i64 @llvm.ppc.popcntb(i64 %0)
}
void test_builtin_ppc_eieio() {
// CHECK-LABEL: @test_builtin_ppc_eieio(
// CHECK-NEXT: entry:
__builtin_ppc_eieio();
// CHECK: call void @llvm.ppc.eieio()
}
void test_builtin_ppc_iospace_eieio() {
// CHECK-LABEL: @test_builtin_ppc_iospace_eieio(
// CHECK-NEXT: entry:
__builtin_ppc_iospace_eieio();
// CHECK: call void @llvm.ppc.iospace.eieio()
}
void test_builtin_ppc_isync() {
// CHECK-LABEL: @test_builtin_ppc_isync(
// CHECK-NEXT: entry:
__builtin_ppc_isync();
// CHECK: call void @llvm.ppc.isync()
}
void test_builtin_ppc_lwsync() {
// CHECK-LABEL: @test_builtin_ppc_lwsync(
// CHECK-NEXT: entry:
__builtin_ppc_lwsync();
// CHECK: call void @llvm.ppc.lwsync()
}
void test_builtin_ppc_iospace_lwsync() {
// CHECK-LABEL: @test_builtin_ppc_iospace_lwsync(
// CHECK-NEXT: entry:
__builtin_ppc_iospace_lwsync();
// CHECK: call void @llvm.ppc.iospace.lwsync()
}
void test_builtin_ppc_sync() {
// CHECK-LABEL: @test_builtin_ppc_sync(
// CHECK-NEXT: entry:
__builtin_ppc_sync();
// CHECK: call void @llvm.ppc.sync()
}
void test_builtin_ppc_iospace_sync() {
// CHECK-LABEL: @test_builtin_ppc_iospace_sync(
// CHECK-NEXT: entry:
__builtin_ppc_iospace_sync();
// CHECK: call void @llvm.ppc.iospace.sync()
}
void test_builtin_ppc_dcbfl() {
// CHECK-LABEL: @test_builtin_ppc_dcbfl(
// CHECK-NEXT: entry:
const void* a;
__builtin_ppc_dcbfl(a);
// CHECK: call void @llvm.ppc.dcbfl(i8* %0)
}
void test_builtin_ppc_dcbflp() {
// CHECK-LABEL: @test_builtin_ppc_dcbflp(
// CHECK-NEXT: entry:
const void* a;
__builtin_ppc_dcbflp(a);
// CHECK: call void @llvm.ppc.dcbflp(i8* %0)
}
void test_builtin_ppc_dcbst() {
// CHECK-LABEL: @test_builtin_ppc_dcbst(
// CHECK-NEXT: entry:
const void* a;
__builtin_ppc_dcbst(a);
// CHECK: call void @llvm.ppc.dcbst(i8* %0)
}
void test_builtin_ppc_dcbt() {
// CHECK-LABEL: @test_builtin_ppc_dcbt(
// CHECK-NEXT: entry:
void* a;
__builtin_ppc_dcbt(a);
// CHECK: call void @llvm.ppc.dcbt(i8* %0)
}
void test_builtin_ppc_dcbtst() {
// CHECK-LABEL: @test_builtin_ppc_dcbtst(
// CHECK-NEXT: entry:
void* a;
__builtin_ppc_dcbtst(a);
// CHECK: call void @llvm.ppc.dcbtst(i8* %0)
}
void test_builtin_ppc_dcbz() {
// CHECK-LABEL: @test_builtin_ppc_dcbz(
// CHECK-NEXT: entry:
void* a;
__builtin_ppc_dcbz(a);
// CHECK: call void @llvm.ppc.dcbz(i8* %0)
}
void test_builtin_ppc_icbt() {
// CHECK-LABEL: @test_builtin_ppc_icbt(
// CHECK-NEXT: entry:
void* a;
__builtin_ppc_icbt(a);
// CHECK: call void @llvm.ppc.icbt(i8* %0)
}

View File

@ -20,34 +20,53 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.".
def int_ppc_dcba : Intrinsic<[], [llvm_ptr_ty], []>;
def int_ppc_dcbf : GCCBuiltin<"__builtin_dcbf">,
Intrinsic<[], [llvm_ptr_ty], [IntrArgMemOnly]>;
def int_ppc_dcbfl : Intrinsic<[], [llvm_ptr_ty], [IntrArgMemOnly]>;
def int_ppc_dcbflp : Intrinsic<[], [llvm_ptr_ty], [IntrArgMemOnly]>;
def int_ppc_dcbfl : GCCBuiltin<"__builtin_ppc_dcbfl">,
Intrinsic<[], [llvm_ptr_ty], [IntrArgMemOnly]>;
def int_ppc_dcbflp : GCCBuiltin<"__builtin_ppc_dcbflp">,
Intrinsic<[], [llvm_ptr_ty], [IntrArgMemOnly]>;
def int_ppc_dcbfps : Intrinsic<[], [llvm_ptr_ty], [IntrArgMemOnly]>;
def int_ppc_dcbstps : Intrinsic<[], [llvm_ptr_ty], [IntrArgMemOnly]>;
def int_ppc_dcbi : Intrinsic<[], [llvm_ptr_ty], []>;
def int_ppc_dcbst : Intrinsic<[], [llvm_ptr_ty], []>;
def int_ppc_dcbt : Intrinsic<[], [llvm_ptr_ty],
def int_ppc_dcbst : GCCBuiltin<"__builtin_ppc_dcbst">,
Intrinsic<[], [llvm_ptr_ty], []>;
def int_ppc_dcbt : GCCBuiltin<"__builtin_ppc_dcbt">,
Intrinsic<[], [llvm_ptr_ty],
[IntrArgMemOnly, NoCapture<ArgIndex<0>>]>;
def int_ppc_dcbtst: Intrinsic<[], [llvm_ptr_ty],
def int_ppc_dcbtst : GCCBuiltin<"__builtin_ppc_dcbtst">,
Intrinsic<[], [llvm_ptr_ty],
[IntrArgMemOnly, NoCapture<ArgIndex<0>>]>;
def int_ppc_dcbt_with_hint: Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty],
[IntrArgMemOnly, NoCapture<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>;
def int_ppc_dcbtst_with_hint: Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty],
[IntrArgMemOnly, NoCapture<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>;
def int_ppc_dcbz : Intrinsic<[], [llvm_ptr_ty], []>;
def int_ppc_dcbz : GCCBuiltin<"__builtin_ppc_dcbz">,
Intrinsic<[], [llvm_ptr_ty], []>;
def int_ppc_dcbzl : Intrinsic<[], [llvm_ptr_ty], []>;
def int_ppc_icbt : GCCBuiltin<"__builtin_ppc_icbt">,
Intrinsic<[], [llvm_ptr_ty], []>;
// Population Count in each Byte.
def int_ppc_popcntb : Intrinsic<[llvm_i64_ty], [llvm_i64_ty], [IntrNoMem]>;
def int_ppc_popcntb : GCCBuiltin<"__builtin_ppc_popcntb">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty], [IntrNoMem]>;
// sync instruction (i.e. sync 0, a.k.a hwsync)
def int_ppc_sync : Intrinsic<[], [], []>;
def int_ppc_sync : GCCBuiltin<"__builtin_ppc_sync">,
Intrinsic<[], [], []>;
def int_ppc_iospace_sync : GCCBuiltin<"__builtin_ppc_iospace_sync">,
Intrinsic<[], [], []>;
// isync instruction
def int_ppc_isync : Intrinsic<[], [], []>;
def int_ppc_isync : GCCBuiltin<"__builtin_ppc_isync">,
Intrinsic<[], [], []>;
// lwsync is sync 1
def int_ppc_lwsync : Intrinsic<[], [], []>;
def int_ppc_lwsync : GCCBuiltin<"__builtin_ppc_lwsync">,
Intrinsic<[], [], []>;
def int_ppc_iospace_lwsync : GCCBuiltin<"__builtin_ppc_iospace_lwsync">,
Intrinsic<[], [], []>;
// eieio instruction
def int_ppc_eieio : Intrinsic<[],[],[]>;
def int_ppc_eieio : GCCBuiltin<"__builtin_ppc_eieio">,
Intrinsic<[],[],[]>;
def int_ppc_iospace_eieio : GCCBuiltin<"__builtin_ppc_iospace_eieio">,
Intrinsic<[],[],[]>;
// Get content from current FPSCR register
def int_ppc_readflm : GCCBuiltin<"__builtin_readflm">,

View File

@ -1360,6 +1360,16 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
// Now process the instruction normally.
break;
}
case PPC::PseudoEIEIO: {
EmitToStreamer(
*OutStreamer,
MCInstBuilder(PPC::ORI).addReg(PPC::X2).addReg(PPC::X2).addImm(0));
EmitToStreamer(
*OutStreamer,
MCInstBuilder(PPC::ORI).addReg(PPC::X2).addReg(PPC::X2).addImm(0));
EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::EnforceIEIO));
return;
}
}
LowerPPCMachineInstrToMCInst(MI, TmpInst, *this);

View File

@ -2021,6 +2021,8 @@ def : Pat<(int_ppc_dcbtst xoaddr:$dst),
(DCBTST 0, xoaddr:$dst)>;
def : Pat<(int_ppc_dcbf xoaddr:$dst),
(DCBF 0, xoaddr:$dst)>;
def : Pat<(int_ppc_icbt xoaddr:$dst),
(ICBT 0, xoaddr:$dst)>;
def : Pat<(prefetch xoaddr:$dst, (i32 0), imm, (i32 1)),
(DCBT 0, xoaddr:$dst)>; // data prefetch for loads
@ -2542,11 +2544,19 @@ let isCodeGenOnly = 1 in {
def EnforceIEIO : XForm_24_eieio<31, 854, (outs), (ins),
"eieio", IIC_LdStLoad, []>;
def PseudoEIEIO : PPCEmitTimePseudo<(outs), (ins), "#PPCEIEIO",
[(int_ppc_eieio)]>;
def : Pat<(int_ppc_sync), (SYNC 0)>, Requires<[HasSYNC]>;
def : Pat<(int_ppc_iospace_sync), (SYNC 0)>, Requires<[HasSYNC]>;
def : Pat<(int_ppc_lwsync), (SYNC 1)>, Requires<[HasSYNC]>;
def : Pat<(int_ppc_iospace_lwsync), (SYNC 1)>, Requires<[HasSYNC]>;
def : Pat<(int_ppc_sync), (MSYNC)>, Requires<[HasOnlyMSYNC]>;
def : Pat<(int_ppc_iospace_sync), (MSYNC)>, Requires<[HasOnlyMSYNC]>;
def : Pat<(int_ppc_lwsync), (MSYNC)>, Requires<[HasOnlyMSYNC]>;
def : Pat<(int_ppc_eieio), (EnforceIEIO)>;
def : Pat<(int_ppc_iospace_lwsync), (MSYNC)>, Requires<[HasOnlyMSYNC]>;
def : Pat<(int_ppc_eieio), (PseudoEIEIO)>;
def : Pat<(int_ppc_iospace_eieio), (PseudoEIEIO)>;
//===----------------------------------------------------------------------===//
// PPC32 Arithmetic Instructions.

View File

@ -0,0 +1,33 @@
; RUN: llc -verify-machineinstrs -mtriple=powerpcle-unknown-linux-gnu \
; RUN: -mattr=+msync -mcpu=pwr8 < %s | FileCheck %s
; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-linux-gnu \
; RUN: -mattr=+msync -mcpu=pwr8 < %s | FileCheck %s
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -mattr=+msync -mcpu=pwr8 < %s | FileCheck %s
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
; RUN: -mattr=+msync -mcpu=pwr8 < %s | FileCheck %s
define dso_local void @test_builtin_ppc_iospace_lwsync() #0 {
; CHECK-LABEL: test_builtin_ppc_iospace_lwsync
entry:
call void @llvm.ppc.iospace.lwsync()
; CHECK: msync
ret void
}
declare void @llvm.ppc.iospace.lwsync() #2
define dso_local void @test_builtin_ppc_iospace_sync() #0 {
; CHECK-LABEL: test_builtin_ppc_iospace_sync
entry:
call void @llvm.ppc.iospace.sync()
; CHECK: msync
ret void
}
declare void @llvm.ppc.iospace.sync() #2

View File

@ -0,0 +1,74 @@
; RUN: llc -verify-machineinstrs -mtriple=powerpcle-unknown-linux-gnu \
; RUN: -mcpu=pwr8 < %s | FileCheck %s
; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-linux-gnu \
; RUN: -mcpu=pwr8 < %s | FileCheck %s
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -mcpu=pwr8 < %s | FileCheck %s
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
; RUN: -mcpu=pwr8 < %s | FileCheck %s
define dso_local void @test_builtin_ppc_eieio() #0 {
; CHECK-LABEL: test_builtin_ppc_eieio
entry:
call void @llvm.ppc.eieio()
; CHECK: ori 2, 2, 0
; CHECK-NEXT: ori 2, 2, 0
; CHECK-NEXT: eieio
ret void
}
declare void @llvm.ppc.eieio() #2
define dso_local void @test_builtin_ppc_iospace_eieio() #0 {
; CHECK-LABEL: test_builtin_ppc_iospace_eieio
entry:
call void @llvm.ppc.iospace.eieio()
; CHECK: ori 2, 2, 0
; CHECK-NEXT: ori 2, 2, 0
; CHECK-NEXT: eieio
ret void
}
declare void @llvm.ppc.iospace.eieio() #2
define dso_local void @test_builtin_ppc_iospace_lwsync() #0 {
; CHECK-LABEL: test_builtin_ppc_iospace_lwsync
entry:
call void @llvm.ppc.iospace.lwsync()
; CHECK: lwsync
ret void
}
declare void @llvm.ppc.iospace.lwsync() #2
define dso_local void @test_builtin_ppc_iospace_sync() #0 {
; CHECK-LABEL: test_builtin_ppc_iospace_sync
entry:
call void @llvm.ppc.iospace.sync()
; CHECK: sync
ret void
}
declare void @llvm.ppc.iospace.sync() #2
define dso_local void @test_builtin_ppc_icbt() #0 {
; CHECK-LABEL: test_builtin_ppc_icbt
entry:
%a = alloca i8*, align 8
%0 = load i8*, i8** %a, align 8
call void @llvm.ppc.icbt(i8* %0)
; CHECK: icbt 0, 0, 3
ret void
}
declare void @llvm.ppc.icbt(i8*) #2

View File

@ -4,7 +4,9 @@
define void @eieio_test() {
; CHECK-LABEL: @eieio_test
; CHECK: eieio
; CHECK: ori r2, r2, 0
; CHECK-NEXT: ori r2, r2, 0
; CHECK-NEXT: eieio
; CHECK-NEXT: blr
entry: