bpf: enable sub-register code-gen for XADD

Support sub-register code-gen for XADD is like supporting any other Load
and Store patterns.

No new instruction is introduced.

  lock *(u32 *)(r1 + 0) += w2

has exactly the same underlying insn as:

  lock *(u32 *)(r1 + 0) += r2

BPF_W width modifier has guaranteed they behave the same at runtime. This
patch merely teaches BPF back-end that BPF_W width modifier could work
GPR32 register class and that's all needed for sub-register code-gen
support for XADD.

test/CodeGen/BPF/xadd.ll updated to include sub-register code-gen tests.

A new testcase test/CodeGen/BPF/xadd_legal.ll is added to make sure the
legal case could pass on all code-gen modes. It could also test dead Def
check on GPR32. If there is no proper handling like what has been done
inside BPFMIChecking.cpp:hasLivingDefs, then this testcase will fail.

Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
llvm-svn: 355126
This commit is contained in:
Jiong Wang 2019-02-28 19:21:28 +00:00
parent 37ce064082
commit 3da8bcd0a0
4 changed files with 55 additions and 5 deletions

View File

@ -614,11 +614,31 @@ class XADD<BPFWidthModifer SizeOp, string OpcodeStr, PatFrag OpNode>
let BPFClass = BPF_STX;
}
class XADD32<BPFWidthModifer SizeOp, string OpcodeStr, PatFrag OpNode>
: TYPE_LD_ST<BPF_XADD.Value, SizeOp.Value,
(outs GPR32:$dst),
(ins MEMri:$addr, GPR32:$val),
"lock *("#OpcodeStr#" *)($addr) += $val",
[(set GPR32:$dst, (OpNode ADDRri:$addr, GPR32:$val))]> {
bits<4> dst;
bits<20> addr;
let Inst{51-48} = addr{19-16}; // base reg
let Inst{55-52} = dst;
let Inst{47-32} = addr{15-0}; // offset
let BPFClass = BPF_STX;
}
let Constraints = "$dst = $val" in {
def XADD32 : XADD<BPF_W, "u32", atomic_load_add_32>;
def XADD64 : XADD<BPF_DW, "u64", atomic_load_add_64>;
// undefined def XADD16 : XADD<1, "xadd16", atomic_load_add_16>;
// undefined def XADD8 : XADD<2, "xadd8", atomic_load_add_8>;
let Predicates = [BPFNoALU32] in {
def XADDW : XADD<BPF_W, "u32", atomic_load_add_32>;
}
let Predicates = [BPFHasALU32], DecoderNamespace = "BPFALU32" in {
def XADDW32 : XADD32<BPF_W, "u32", atomic_load_add_32>;
}
def XADDD : XADD<BPF_DW, "u64", atomic_load_add_64>;
}
// bswap16, bswap32, bswap64

View File

@ -155,7 +155,9 @@ static bool hasLiveDefs(const MachineInstr &MI, const TargetRegisterInfo *TRI) {
void BPFMIPreEmitChecking::checkingIllegalXADD(void) {
for (MachineBasicBlock &MBB : *MF) {
for (MachineInstr &MI : MBB) {
if (MI.getOpcode() != BPF::XADD32 && MI.getOpcode() != BPF::XADD64)
if (MI.getOpcode() != BPF::XADDW &&
MI.getOpcode() != BPF::XADDD &&
MI.getOpcode() != BPF::XADDW32)
continue;
LLVM_DEBUG(MI.dump());

View File

@ -1,5 +1,7 @@
; RUN: not llc -march=bpfel < %s 2>&1 | FileCheck %s
; RUN: not llc -march=bpfeb < %s 2>&1 | FileCheck %s
; RUN: not llc -march=bpfel -mattr=+alu32 < %s 2>&1 | FileCheck %s
; RUN: not llc -march=bpfeb -mattr=+alu32 < %s 2>&1 | FileCheck %s
; This file is generated with the source command and source
; $ clang -target bpf -O2 -g -S -emit-llvm t.c

View File

@ -0,0 +1,26 @@
; RUN: llc -march=bpfel < %s 2>&1 | FileCheck --check-prefix=CHECK-64 %s
; RUN: llc -march=bpfeb < %s 2>&1 | FileCheck --check-prefix=CHECK-64 %s
; RUN: llc -march=bpfel -mattr=+alu32 < %s 2>&1 | FileCheck --check-prefix=CHECK-32 %s
; RUN: llc -march=bpfeb -mattr=+alu32 < %s 2>&1 | FileCheck --check-prefix=CHECK-32 %s
; This file is generated with the source command and source
; $ clang -target bpf -O2 -S -emit-llvm t.c
; $ cat t.c
; int test(int *ptr, unsigned long long a) {
; __sync_fetch_and_add(ptr, a);
; return *ptr;
; }
;
; NOTE: passing unsigned long long as the second operand of __sync_fetch_and_add
; could effectively create sub-register reference coming from indexing a full
; register which could then exerceise hasLivingDefs inside BPFMIChecker.cpp.
define dso_local i32 @test(i32* nocapture %ptr, i64 %a) {
entry:
%conv = trunc i64 %a to i32
%0 = atomicrmw add i32* %ptr, i32 %conv seq_cst
; CHECK-64: lock *(u32 *)(r1 + 0) += r2
; CHECK-32: lock *(u32 *)(r1 + 0) += w2
%1 = load i32, i32* %ptr, align 4
ret i32 %1
}