From 94ba37f8e3ef42e3c76afab9a222763b01663007 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Fri, 22 Feb 2008 09:25:47 +0000 Subject: [PATCH] Allow re-materialization of pic load (controlled by -remat-pic-load for now). llvm-svn: 47476 --- llvm/lib/Target/X86/X86InstrInfo.cpp | 26 ++++++++++++-- llvm/test/CodeGen/X86/pic-load-remat.ll | 45 +++++++++++++++++++++++++ 2 files changed, 68 insertions(+), 3 deletions(-) create mode 100644 llvm/test/CodeGen/X86/pic-load-remat.ll diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index cfbce7e14d4d..082b896b37b3 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -37,6 +37,10 @@ namespace { cl::desc("Print instructions that the allocator wants to" " fuse, but the X86 backend currently can't"), cl::Hidden); + cl::opt + ReMatPICLoad("remat-pic-load", + cl::desc("Allow rematerializing pic load"), + cl::init(false), cl::Hidden); } X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) @@ -735,10 +739,26 @@ bool X86InstrInfo::isReallyTriviallyReMaterializable(MachineInstr *MI) const { // Loads from constant pools are trivially rematerializable. if (MI->getOperand(1).isReg() && MI->getOperand(2).isImm() && MI->getOperand(3).isReg() && MI->getOperand(4).isCPI() && - MI->getOperand(1).getReg() == 0 && MI->getOperand(2).getImm() == 1 && - MI->getOperand(3).getReg() == 0) - return true; + MI->getOperand(3).getReg() == 0) { + unsigned BaseReg = MI->getOperand(1).getReg(); + if (BaseReg == 0) + return true; + if (!ReMatPICLoad) + return false; + // Allow re-materialization of PIC load. + MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); + bool isPICBase = false; + for (MachineRegisterInfo::def_iterator I = MRI.def_begin(BaseReg), + E = MRI.def_end(); I != E; ++I) { + MachineInstr *DefMI = I.getOperand().getParent(); + if (DefMI->getOpcode() != X86::MOVPC32r) + return false; + assert(!isPICBase && "More than one PIC base?"); + isPICBase = true; + } + return isPICBase; + } // If this is a load from a fixed argument slot, we know the value is // invariant across the whole function, because we don't redefine argument diff --git a/llvm/test/CodeGen/X86/pic-load-remat.ll b/llvm/test/CodeGen/X86/pic-load-remat.ll new file mode 100644 index 000000000000..674fb01ec6ff --- /dev/null +++ b/llvm/test/CodeGen/X86/pic-load-remat.ll @@ -0,0 +1,45 @@ +; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin -mattr=+sse2 -relocation-model=pic -remat-pic-load | grep padd | grep pb + +define void @f() nounwind { +entry: + %tmp4403 = tail call <8 x i16> @llvm.x86.sse2.psubs.w( <8 x i16> zeroinitializer, <8 x i16> zeroinitializer ) nounwind readnone ; <<8 x i16>> [#uses=2] + %tmp4443 = tail call <8 x i16> @llvm.x86.sse2.padds.w( <8 x i16> zeroinitializer, <8 x i16> zeroinitializer ) nounwind readnone ; <<8 x i16>> [#uses=1] + %tmp4609 = tail call <8 x i16> @llvm.x86.sse2.psll.w( <8 x i16> zeroinitializer, <8 x i16> bitcast (<4 x i32> < i32 14, i32 undef, i32 undef, i32 undef > to <8 x i16>) ) ; <<8 x i16>> [#uses=1] + %tmp4651 = add <8 x i16> %tmp4609, < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1 > ; <<8 x i16>> [#uses=1] + %tmp4658 = tail call <8 x i16> @llvm.x86.sse2.psll.w( <8 x i16> %tmp4651, <8 x i16> bitcast (<4 x i32> < i32 1, i32 undef, i32 undef, i32 undef > to <8 x i16>) ) ; <<8 x i16>> [#uses=1] + %tmp4669 = tail call <8 x i16> @llvm.x86.sse2.pmaxs.w( <8 x i16> < i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170 >, <8 x i16> %tmp4443 ) nounwind readnone ; <<8 x i16>> [#uses=2] + %tmp4679 = tail call <8 x i16> @llvm.x86.sse2.padds.w( <8 x i16> %tmp4669, <8 x i16> %tmp4669 ) nounwind readnone ; <<8 x i16>> [#uses=1] + %tmp4689 = add <8 x i16> %tmp4679, %tmp4658 ; <<8 x i16>> [#uses=1] + %tmp4700 = tail call <8 x i16> @llvm.x86.sse2.padds.w( <8 x i16> %tmp4689, <8 x i16> zeroinitializer ) nounwind readnone ; <<8 x i16>> [#uses=1] + %tmp4708 = bitcast <8 x i16> %tmp4700 to <2 x i64> ; <<2 x i64>> [#uses=1] + %tmp4772 = add <8 x i16> zeroinitializer, < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1 > ; <<8 x i16>> [#uses=1] + %tmp4779 = tail call <8 x i16> @llvm.x86.sse2.psll.w( <8 x i16> %tmp4772, <8 x i16> bitcast (<4 x i32> < i32 1, i32 undef, i32 undef, i32 undef > to <8 x i16>) ) ; <<8 x i16>> [#uses=1] + %tmp4800 = tail call <8 x i16> @llvm.x86.sse2.padds.w( <8 x i16> zeroinitializer, <8 x i16> zeroinitializer ) nounwind readnone ; <<8 x i16>> [#uses=1] + %tmp4810 = add <8 x i16> %tmp4800, %tmp4779 ; <<8 x i16>> [#uses=1] + %tmp4821 = tail call <8 x i16> @llvm.x86.sse2.padds.w( <8 x i16> %tmp4810, <8 x i16> zeroinitializer ) nounwind readnone ; <<8 x i16>> [#uses=1] + %tmp4829 = bitcast <8 x i16> %tmp4821 to <2 x i64> ; <<2 x i64>> [#uses=1] + %tmp4900 = tail call <8 x i16> @llvm.x86.sse2.psll.w( <8 x i16> zeroinitializer, <8 x i16> bitcast (<4 x i32> < i32 1, i32 undef, i32 undef, i32 undef > to <8 x i16>) ) ; <<8 x i16>> [#uses=1] + %tmp4911 = tail call <8 x i16> @llvm.x86.sse2.pmaxs.w( <8 x i16> < i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170 >, <8 x i16> zeroinitializer ) nounwind readnone ; <<8 x i16>> [#uses=2] + %tmp4921 = tail call <8 x i16> @llvm.x86.sse2.padds.w( <8 x i16> %tmp4911, <8 x i16> %tmp4911 ) nounwind readnone ; <<8 x i16>> [#uses=1] + %tmp4931 = add <8 x i16> %tmp4921, %tmp4900 ; <<8 x i16>> [#uses=1] + %tmp4942 = tail call <8 x i16> @llvm.x86.sse2.padds.w( <8 x i16> %tmp4931, <8 x i16> zeroinitializer ) nounwind readnone ; <<8 x i16>> [#uses=1] + %tmp4950 = bitcast <8 x i16> %tmp4942 to <2 x i64> ; <<2 x i64>> [#uses=1] + %tmp4957 = tail call <8 x i16> @llvm.x86.sse2.padds.w( <8 x i16> %tmp4403, <8 x i16> zeroinitializer ) nounwind readnone ; <<8 x i16>> [#uses=1] + %tmp4958 = bitcast <8 x i16> %tmp4957 to <2 x i64> ; <<2 x i64>> [#uses=1] + %tmp4967 = tail call <8 x i16> @llvm.x86.sse2.psubs.w( <8 x i16> %tmp4403, <8 x i16> zeroinitializer ) nounwind readnone ; <<8 x i16>> [#uses=1] + %tmp4968 = bitcast <8 x i16> %tmp4967 to <2 x i64> ; <<2 x i64>> [#uses=1] + store <2 x i64> %tmp4829, <2 x i64>* null, align 16 + store <2 x i64> %tmp4958, <2 x i64>* null, align 16 + store <2 x i64> %tmp4968, <2 x i64>* null, align 16 + store <2 x i64> %tmp4950, <2 x i64>* null, align 16 + store <2 x i64> %tmp4708, <2 x i64>* null, align 16 + unreachable +} + +declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone + +declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone + +declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone + +declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone