From 7a2a0f80de2b1a0994b85e23cb5d01c384135474 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Fri, 8 Jul 2011 00:04:56 +0000 Subject: [PATCH] Go ahead and emit the barrier on x86-64 even without sse2. The processor supports it just fine. Fixes PR9675 and rdar://9740801 llvm-svn: 134664 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 7 ++++--- llvm/test/CodeGen/X86/membarrier.ll | 15 +++++++++++++++ 2 files changed, 19 insertions(+), 3 deletions(-) create mode 100644 llvm/test/CodeGen/X86/membarrier.ll diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 01cea3b0a784..0dfc3167600a 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -9067,10 +9067,11 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const { SDValue X86TargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const{ DebugLoc dl = Op.getDebugLoc(); - if (!Subtarget->hasSSE2()) { + // Go ahead and emit the fence on x86-64 even if we asked for no-sse2. + // There isn't any reason to disable it if the target processor supports it. + if (!Subtarget->hasSSE2() && !Subtarget->is64Bit()) { SDValue Chain = Op.getOperand(0); - SDValue Zero = DAG.getConstant(0, - Subtarget->is64Bit() ? MVT::i64 : MVT::i32); + SDValue Zero = DAG.getConstant(0, MVT::i32); SDValue Ops[] = { DAG.getRegister(X86::ESP, MVT::i32), // Base DAG.getTargetConstant(1, MVT::i8), // Scale diff --git a/llvm/test/CodeGen/X86/membarrier.ll b/llvm/test/CodeGen/X86/membarrier.ll new file mode 100644 index 000000000000..42f8ef5ff047 --- /dev/null +++ b/llvm/test/CodeGen/X86/membarrier.ll @@ -0,0 +1,15 @@ +; RUN: llc < %s -march=x86-64 -mattr=-sse -O0 +; PR9675 + +define i32 @t() { +entry: + %i = alloca i32, align 4 + store i32 1, i32* %i, align 4 + call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 true) + %0 = call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %i, i32 1) + call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 true) + ret i32 0 +} + +declare i32 @llvm.atomic.load.sub.i32.p0i32(i32* nocapture, i32) nounwind +declare void @llvm.memory.barrier(i1, i1, i1, i1, i1) nounwind