From 1ff5f29fb55672a660045ed23289a12ca8167a8f Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Wed, 26 Mar 2014 14:39:31 +0000 Subject: [PATCH] ARM: add intrinsics for the v8 ldaex/stlex We've already got versions without the barriers, so this just adds IR-level support for generating the new v8 ones. rdar://problem/16227836 llvm-svn: 204813 --- llvm/include/llvm/IR/IntrinsicsARM.td | 8 +++ llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp | 15 +++-- llvm/lib/Target/ARM/ARMISelLowering.cpp | 4 ++ llvm/lib/Target/ARM/ARMInstrInfo.td | 63 +++++++++++++++--- llvm/lib/Target/ARM/ARMInstrThumb2.td | 42 ++++++++---- llvm/test/CodeGen/ARM/ldaex-stlex.ll | 88 +++++++++++++++++++++++++ 6 files changed, 194 insertions(+), 26 deletions(-) create mode 100644 llvm/test/CodeGen/ARM/ldaex-stlex.ll diff --git a/llvm/include/llvm/IR/IntrinsicsARM.td b/llvm/include/llvm/IR/IntrinsicsARM.td index e86ee1257084..f70dbabd171f 100644 --- a/llvm/include/llvm/IR/IntrinsicsARM.td +++ b/llvm/include/llvm/IR/IntrinsicsARM.td @@ -38,12 +38,20 @@ def int_arm_usat : GCCBuiltin<"__builtin_arm_usat">, def int_arm_ldrex : Intrinsic<[llvm_i32_ty], [llvm_anyptr_ty]>; def int_arm_strex : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_anyptr_ty]>; + +def int_arm_ldaex : Intrinsic<[llvm_i32_ty], [llvm_anyptr_ty]>; +def int_arm_stlex : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_anyptr_ty]>; + def int_arm_clrex : Intrinsic<[]>; def int_arm_strexd : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty]>; def int_arm_ldrexd : Intrinsic<[llvm_i32_ty, llvm_i32_ty], [llvm_ptr_ty]>; +def int_arm_stlexd : Intrinsic<[llvm_i32_ty], + [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty]>; +def int_arm_ldaexd : Intrinsic<[llvm_i32_ty, llvm_i32_ty], [llvm_ptr_ty]>; + //===----------------------------------------------------------------------===// // Data barrier instructions def int_arm_dmb : GCCBuiltin<"__builtin_arm_dmb">, Intrinsic<[], [llvm_i32_ty]>; diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index 56bc7123ce53..f8841a2b058d 100644 --- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -3022,13 +3022,16 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { default: break; + case Intrinsic::arm_ldaexd: case Intrinsic::arm_ldrexd: { - SDValue MemAddr = N->getOperand(2); SDLoc dl(N); SDValue Chain = N->getOperand(0); - + SDValue MemAddr = N->getOperand(2); bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2(); - unsigned NewOpc = isThumb ? ARM::t2LDREXD :ARM::LDREXD; + + bool IsAcquire = IntNo == Intrinsic::arm_ldaexd; + unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD) + : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD); // arm_ldrexd returns a i64 value in {i32, i32} std::vector ResTys; @@ -3080,7 +3083,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { ReplaceUses(SDValue(N, 2), OutChain); return NULL; } - + case Intrinsic::arm_stlexd: case Intrinsic::arm_strexd: { SDLoc dl(N); SDValue Chain = N->getOperand(0); @@ -3106,7 +3109,9 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { Ops.push_back(CurDAG->getRegister(0, MVT::i32)); Ops.push_back(Chain); - unsigned NewOpc = isThumb ? ARM::t2STREXD : ARM::STREXD; + bool IsRelease = IntNo == Intrinsic::arm_stlexd; + unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD) + : (IsRelease ? ARM::STLEXD : ARM::STREXD); SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops); // Transfer memoperands. diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index b512db5551e2..42bc5bc78601 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -11150,6 +11150,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.writeMem = true; return true; } + case Intrinsic::arm_ldaex: case Intrinsic::arm_ldrex: { PointerType *PtrTy = cast(I.getArgOperand(0)->getType()); Info.opc = ISD::INTRINSIC_W_CHAIN; @@ -11162,6 +11163,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.writeMem = false; return true; } + case Intrinsic::arm_stlex: case Intrinsic::arm_strex: { PointerType *PtrTy = cast(I.getArgOperand(1)->getType()); Info.opc = ISD::INTRINSIC_W_CHAIN; @@ -11174,6 +11176,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.writeMem = true; return true; } + case Intrinsic::arm_stlexd: case Intrinsic::arm_strexd: { Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::i64; @@ -11185,6 +11188,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.writeMem = true; return true; } + case Intrinsic::arm_ldaexd: case Intrinsic::arm_ldrexd: { Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::i64; diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td index d4976594d4ff..cc748e19c851 100644 --- a/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -4574,6 +4574,33 @@ def strex_4 : PatFrag<(ops node:$val, node:$ptr), return cast(N)->getMemoryVT() == MVT::i32; }]>; +def ldaex_1 : PatFrag<(ops node:$ptr), (int_arm_ldaex node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i8; +}]>; + +def ldaex_2 : PatFrag<(ops node:$ptr), (int_arm_ldaex node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i16; +}]>; + +def ldaex_4 : PatFrag<(ops node:$ptr), (int_arm_ldaex node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i32; +}]>; + +def stlex_1 : PatFrag<(ops node:$val, node:$ptr), + (int_arm_stlex node:$val, node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i8; +}]>; + +def stlex_2 : PatFrag<(ops node:$val, node:$ptr), + (int_arm_stlex node:$val, node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i16; +}]>; + +def stlex_4 : PatFrag<(ops node:$val, node:$ptr), + (int_arm_stlex node:$val, node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i32; +}]>; + let mayLoad = 1 in { def LDREXB : AIldrex<0b10, (outs GPR:$Rt), (ins addr_offset_none:$addr), NoItinerary, "ldrexb", "\t$Rt, $addr", @@ -4591,11 +4618,14 @@ def LDREXD : AIldrex<0b01, (outs GPRPairOp:$Rt),(ins addr_offset_none:$addr), } def LDAEXB : AIldaex<0b10, (outs GPR:$Rt), (ins addr_offset_none:$addr), - NoItinerary, "ldaexb", "\t$Rt, $addr", []>; + NoItinerary, "ldaexb", "\t$Rt, $addr", + [(set GPR:$Rt, (ldaex_1 addr_offset_none:$addr))]>; def LDAEXH : AIldaex<0b11, (outs GPR:$Rt), (ins addr_offset_none:$addr), - NoItinerary, "ldaexh", "\t$Rt, $addr", []>; + NoItinerary, "ldaexh", "\t$Rt, $addr", + [(set GPR:$Rt, (ldaex_2 addr_offset_none:$addr))]>; def LDAEX : AIldaex<0b00, (outs GPR:$Rt), (ins addr_offset_none:$addr), - NoItinerary, "ldaex", "\t$Rt, $addr", []>; + NoItinerary, "ldaex", "\t$Rt, $addr", + [(set GPR:$Rt, (ldaex_4 addr_offset_none:$addr))]>; let hasExtraDefRegAllocReq = 1 in def LDAEXD : AIldaex<0b01, (outs GPRPairOp:$Rt),(ins addr_offset_none:$addr), NoItinerary, "ldaexd", "\t$Rt, $addr", []> { @@ -4606,13 +4636,16 @@ def LDAEXD : AIldaex<0b01, (outs GPRPairOp:$Rt),(ins addr_offset_none:$addr), let mayStore = 1, Constraints = "@earlyclobber $Rd" in { def STREXB: AIstrex<0b10, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr), NoItinerary, "strexb", "\t$Rd, $Rt, $addr", - [(set GPR:$Rd, (strex_1 GPR:$Rt, addr_offset_none:$addr))]>; + [(set GPR:$Rd, (strex_1 GPR:$Rt, + addr_offset_none:$addr))]>; def STREXH: AIstrex<0b11, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr), NoItinerary, "strexh", "\t$Rd, $Rt, $addr", - [(set GPR:$Rd, (strex_2 GPR:$Rt, addr_offset_none:$addr))]>; + [(set GPR:$Rd, (strex_2 GPR:$Rt, + addr_offset_none:$addr))]>; def STREX : AIstrex<0b00, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr), NoItinerary, "strex", "\t$Rd, $Rt, $addr", - [(set GPR:$Rd, (strex_4 GPR:$Rt, addr_offset_none:$addr))]>; + [(set GPR:$Rd, (strex_4 GPR:$Rt, + addr_offset_none:$addr))]>; let hasExtraSrcRegAllocReq = 1 in def STREXD : AIstrex<0b01, (outs GPR:$Rd), (ins GPRPairOp:$Rt, addr_offset_none:$addr), @@ -4621,13 +4654,16 @@ def STREXD : AIstrex<0b01, (outs GPR:$Rd), } def STLEXB: AIstlex<0b10, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr), NoItinerary, "stlexb", "\t$Rd, $Rt, $addr", - []>; + [(set GPR:$Rd, + (stlex_1 GPR:$Rt, addr_offset_none:$addr))]>; def STLEXH: AIstlex<0b11, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr), NoItinerary, "stlexh", "\t$Rd, $Rt, $addr", - []>; + [(set GPR:$Rd, + (stlex_2 GPR:$Rt, addr_offset_none:$addr))]>; def STLEX : AIstlex<0b00, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr), NoItinerary, "stlex", "\t$Rd, $Rt, $addr", - []>; + [(set GPR:$Rd, + (stlex_4 GPR:$Rt, addr_offset_none:$addr))]>; let hasExtraSrcRegAllocReq = 1 in def STLEXD : AIstlex<0b01, (outs GPR:$Rd), (ins GPRPairOp:$Rt, addr_offset_none:$addr), @@ -4651,6 +4687,15 @@ def : ARMPat<(strex_1 (and GPR:$Rt, 0xff), addr_offset_none:$addr), def : ARMPat<(strex_2 (and GPR:$Rt, 0xffff), addr_offset_none:$addr), (STREXH GPR:$Rt, addr_offset_none:$addr)>; +def : ARMPat<(and (ldaex_1 addr_offset_none:$addr), 0xff), + (LDAEXB addr_offset_none:$addr)>; +def : ARMPat<(and (ldaex_2 addr_offset_none:$addr), 0xffff), + (LDAEXH addr_offset_none:$addr)>; +def : ARMPat<(stlex_1 (and GPR:$Rt, 0xff), addr_offset_none:$addr), + (STLEXB GPR:$Rt, addr_offset_none:$addr)>; +def : ARMPat<(stlex_2 (and GPR:$Rt, 0xffff), addr_offset_none:$addr), + (STLEXH GPR:$Rt, addr_offset_none:$addr)>; + class acquiring_load : PatFrag<(ops node:$ptr), (base node:$ptr), [{ AtomicOrdering Ordering = cast(N)->getOrdering(); diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td index e0bed42051d0..387bd6063828 100644 --- a/llvm/lib/Target/ARM/ARMInstrThumb2.td +++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td @@ -3284,15 +3284,18 @@ def t2LDREXD : T2I_ldrex<0b0111, (outs rGPR:$Rt, rGPR:$Rt2), def t2LDAEXB : T2I_ldrex<0b1100, (outs rGPR:$Rt), (ins addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, "ldaexb", "\t$Rt, $addr", "", - []>, Requires<[IsThumb, HasV8]>; + [(set rGPR:$Rt, (ldaex_1 addr_offset_none:$addr))]>, + Requires<[IsThumb, HasV8]>; def t2LDAEXH : T2I_ldrex<0b1101, (outs rGPR:$Rt), (ins addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, "ldaexh", "\t$Rt, $addr", "", - []>, Requires<[IsThumb, HasV8]>; + [(set rGPR:$Rt, (ldaex_2 addr_offset_none:$addr))]>, + Requires<[IsThumb, HasV8]>; def t2LDAEX : Thumb2I<(outs rGPR:$Rt), (ins addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, "ldaex", "\t$Rt, $addr", "", - []>, Requires<[IsThumb, HasV8]> { + [(set rGPR:$Rt, (ldaex_4 addr_offset_none:$addr))]>, + Requires<[IsThumb, HasV8]> { bits<4> Rt; bits<4> addr; let Inst{31-27} = 0b11101; @@ -3320,21 +3323,21 @@ def t2STREXB : T2I_strex<0b0100, (outs rGPR:$Rd), (ins rGPR:$Rt, addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, "strexb", "\t$Rd, $Rt, $addr", "", - [(set rGPR:$Rd, (strex_1 rGPR:$Rt, - addr_offset_none:$addr))]>; + [(set rGPR:$Rd, + (strex_1 rGPR:$Rt, addr_offset_none:$addr))]>; def t2STREXH : T2I_strex<0b0101, (outs rGPR:$Rd), (ins rGPR:$Rt, addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, "strexh", "\t$Rd, $Rt, $addr", "", - [(set rGPR:$Rd, (strex_2 rGPR:$Rt, - addr_offset_none:$addr))]>; + [(set rGPR:$Rd, + (strex_2 rGPR:$Rt, addr_offset_none:$addr))]>; def t2STREX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt, t2addrmode_imm0_1020s4:$addr), AddrModeNone, 4, NoItinerary, "strex", "\t$Rd, $Rt, $addr", "", - [(set rGPR:$Rd, (strex_4 rGPR:$Rt, - t2addrmode_imm0_1020s4:$addr))]> { + [(set rGPR:$Rd, + (strex_4 rGPR:$Rt, t2addrmode_imm0_1020s4:$addr))]> { bits<4> Rd; bits<4> Rt; bits<12> addr; @@ -3358,19 +3361,25 @@ def t2STLEXB : T2I_strex<0b1100, (outs rGPR:$Rd), (ins rGPR:$Rt, addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, "stlexb", "\t$Rd, $Rt, $addr", "", - []>, Requires<[IsThumb, HasV8]>; + [(set rGPR:$Rd, + (stlex_1 rGPR:$Rt, addr_offset_none:$addr))]>, + Requires<[IsThumb, HasV8]>; def t2STLEXH : T2I_strex<0b1101, (outs rGPR:$Rd), (ins rGPR:$Rt, addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, "stlexh", "\t$Rd, $Rt, $addr", "", - []>, Requires<[IsThumb, HasV8]>; + [(set rGPR:$Rd, + (stlex_2 rGPR:$Rt, addr_offset_none:$addr))]>, + Requires<[IsThumb, HasV8]>; def t2STLEX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt, addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, "stlex", "\t$Rd, $Rt, $addr", "", - []>, Requires<[IsThumb, HasV8]> { + [(set rGPR:$Rd, + (stlex_4 rGPR:$Rt, addr_offset_none:$addr))]>, + Requires<[IsThumb, HasV8]> { bits<4> Rd; bits<4> Rt; bits<4> addr; @@ -3412,6 +3421,15 @@ def : T2Pat<(strex_1 (and GPR:$Rt, 0xff), addr_offset_none:$addr), def : T2Pat<(strex_2 (and GPR:$Rt, 0xffff), addr_offset_none:$addr), (t2STREXH GPR:$Rt, addr_offset_none:$addr)>; +def : T2Pat<(and (ldaex_1 addr_offset_none:$addr), 0xff), + (t2LDAEXB addr_offset_none:$addr)>; +def : T2Pat<(and (ldaex_2 addr_offset_none:$addr), 0xffff), + (t2LDAEXH addr_offset_none:$addr)>; +def : T2Pat<(stlex_1 (and GPR:$Rt, 0xff), addr_offset_none:$addr), + (t2STLEXB GPR:$Rt, addr_offset_none:$addr)>; +def : T2Pat<(stlex_2 (and GPR:$Rt, 0xffff), addr_offset_none:$addr), + (t2STLEXH GPR:$Rt, addr_offset_none:$addr)>; + //===----------------------------------------------------------------------===// // SJLJ Exception handling intrinsics // eh_sjlj_setjmp() is an instruction sequence to store the return diff --git a/llvm/test/CodeGen/ARM/ldaex-stlex.ll b/llvm/test/CodeGen/ARM/ldaex-stlex.ll new file mode 100644 index 000000000000..f309f77fd5fe --- /dev/null +++ b/llvm/test/CodeGen/ARM/ldaex-stlex.ll @@ -0,0 +1,88 @@ +; RUN: llc < %s -mtriple=armv8-apple-darwin | FileCheck %s +; RUN: llc < %s -mtriple=thumbv8-apple-darwin | FileCheck %s + +%0 = type { i32, i32 } + +; CHECK-LABEL: f0: +; CHECK: ldaexd +define i64 @f0(i8* %p) nounwind readonly { +entry: + %ldaexd = tail call %0 @llvm.arm.ldaexd(i8* %p) + %0 = extractvalue %0 %ldaexd, 1 + %1 = extractvalue %0 %ldaexd, 0 + %2 = zext i32 %0 to i64 + %3 = zext i32 %1 to i64 + %shl = shl nuw i64 %2, 32 + %4 = or i64 %shl, %3 + ret i64 %4 +} + +; CHECK-LABEL: f1: +; CHECK: stlexd +define i32 @f1(i8* %ptr, i64 %val) nounwind { +entry: + %tmp4 = trunc i64 %val to i32 + %tmp6 = lshr i64 %val, 32 + %tmp7 = trunc i64 %tmp6 to i32 + %stlexd = tail call i32 @llvm.arm.stlexd(i32 %tmp4, i32 %tmp7, i8* %ptr) + ret i32 %stlexd +} + +declare %0 @llvm.arm.ldaexd(i8*) nounwind readonly +declare i32 @llvm.arm.stlexd(i32, i32, i8*) nounwind + +; CHECK-LABEL: test_load_i8: +; CHECK: ldaexb r0, [r0] +; CHECK-NOT: uxtb +define i32 @test_load_i8(i8* %addr) { + %val = call i32 @llvm.arm.ldaex.p0i8(i8* %addr) + ret i32 %val +} + +; CHECK-LABEL: test_load_i16: +; CHECK: ldaexh r0, [r0] +; CHECK-NOT: uxth +define i32 @test_load_i16(i16* %addr) { + %val = call i32 @llvm.arm.ldaex.p0i16(i16* %addr) + ret i32 %val +} + +; CHECK-LABEL: test_load_i32: +; CHECK: ldaex r0, [r0] +define i32 @test_load_i32(i32* %addr) { + %val = call i32 @llvm.arm.ldaex.p0i32(i32* %addr) + ret i32 %val +} + +declare i32 @llvm.arm.ldaex.p0i8(i8*) nounwind readonly +declare i32 @llvm.arm.ldaex.p0i16(i16*) nounwind readonly +declare i32 @llvm.arm.ldaex.p0i32(i32*) nounwind readonly + +; CHECK-LABEL: test_store_i8: +; CHECK-NOT: uxtb +; CHECK: stlexb r0, r1, [r2] +define i32 @test_store_i8(i32, i8 %val, i8* %addr) { + %extval = zext i8 %val to i32 + %res = call i32 @llvm.arm.stlex.p0i8(i32 %extval, i8* %addr) + ret i32 %res +} + +; CHECK-LABEL: test_store_i16: +; CHECK-NOT: uxth +; CHECK: stlexh r0, r1, [r2] +define i32 @test_store_i16(i32, i16 %val, i16* %addr) { + %extval = zext i16 %val to i32 + %res = call i32 @llvm.arm.stlex.p0i16(i32 %extval, i16* %addr) + ret i32 %res +} + +; CHECK-LABEL: test_store_i32: +; CHECK: stlex r0, r1, [r2] +define i32 @test_store_i32(i32, i32 %val, i32* %addr) { + %res = call i32 @llvm.arm.stlex.p0i32(i32 %val, i32* %addr) + ret i32 %res +} + +declare i32 @llvm.arm.stlex.p0i8(i32, i8*) nounwind +declare i32 @llvm.arm.stlex.p0i16(i32, i16*) nounwind +declare i32 @llvm.arm.stlex.p0i32(i32, i32*) nounwind