diff --git a/llvm/lib/Target/X86/X86ExpandPseudo.cpp b/llvm/lib/Target/X86/X86ExpandPseudo.cpp index ca7dfcdbb9ca..7382cd07be82 100644 --- a/llvm/lib/Target/X86/X86ExpandPseudo.cpp +++ b/llvm/lib/Target/X86/X86ExpandPseudo.cpp @@ -648,35 +648,24 @@ void X86ExpandPseudo::ExpandVastartSaveXmmRegs( EntryBlk->end()); TailBlk->transferSuccessorsAndUpdatePHIs(EntryBlk); - int64_t FrameIndex = VAStartPseudoInstr->getOperand(1).getImm(); - Register BaseReg; - uint64_t FrameOffset = - X86FL->getFrameIndexReference(*Func, FrameIndex, BaseReg).getFixed(); - uint64_t VarArgsRegsOffset = VAStartPseudoInstr->getOperand(2).getImm(); + uint64_t FrameOffset = VAStartPseudoInstr->getOperand(4).getImm(); + uint64_t VarArgsRegsOffset = VAStartPseudoInstr->getOperand(6).getImm(); // TODO: add support for YMM and ZMM here. unsigned MOVOpc = STI->hasAVX() ? X86::VMOVAPSmr : X86::MOVAPSmr; // In the XMM save block, save all the XMM argument registers. - for (int64_t OpndIdx = 3, RegIdx = 0; + for (int64_t OpndIdx = 7, RegIdx = 0; OpndIdx < VAStartPseudoInstr->getNumOperands() - 1; OpndIdx++, RegIdx++) { - - int64_t Offset = FrameOffset + VarArgsRegsOffset + RegIdx * 16; - - MachineMemOperand *MMO = Func->getMachineMemOperand( - MachinePointerInfo::getFixedStack(*Func, FrameIndex, Offset), - MachineMemOperand::MOStore, - /*Size=*/16, Align(16)); - - BuildMI(GuardedRegsBlk, DL, TII->get(MOVOpc)) - .addReg(BaseReg) - .addImm(/*Scale=*/1) - .addReg(/*IndexReg=*/0) - .addImm(/*Disp=*/Offset) - .addReg(/*Segment=*/0) - .addReg(VAStartPseudoInstr->getOperand(OpndIdx).getReg()) - .addMemOperand(MMO); + auto NewMI = BuildMI(GuardedRegsBlk, DL, TII->get(MOVOpc)); + for (int i = 0; i < X86::AddrNumOperands; ++i) { + if (i == X86::AddrDisp) + NewMI.addImm(FrameOffset + VarArgsRegsOffset + RegIdx * 16); + else + NewMI.add(VAStartPseudoInstr->getOperand(i + 1)); + } + NewMI.addReg(VAStartPseudoInstr->getOperand(OpndIdx).getReg()); assert(Register::isPhysicalRegister( VAStartPseudoInstr->getOperand(OpndIdx).getReg())); } diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index f401b54da014..0627e6cb6105 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -3742,13 +3742,19 @@ void VarArgsLoweringHelper::createVarArgAreaAndStoreRegisters( SmallVector SaveXMMOps; SaveXMMOps.push_back(Chain); SaveXMMOps.push_back(ALVal); - SaveXMMOps.push_back( - DAG.getTargetConstant(FuncInfo->getRegSaveFrameIndex(), DL, MVT::i32)); + SaveXMMOps.push_back(RSFIN); SaveXMMOps.push_back( DAG.getTargetConstant(FuncInfo->getVarArgsFPOffset(), DL, MVT::i32)); llvm::append_range(SaveXMMOps, LiveXMMRegs); - MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, DL, - MVT::Other, SaveXMMOps)); + MachineMemOperand *StoreMMO = + DAG.getMachineFunction().getMachineMemOperand( + MachinePointerInfo::getFixedStack( + DAG.getMachineFunction(), FuncInfo->getRegSaveFrameIndex(), + Offset), + MachineMemOperand::MOStore, 128, Align(16)); + MemOps.push_back(DAG.getMemIntrinsicNode(X86ISD::VASTART_SAVE_XMM_REGS, + DL, DAG.getVTList(MVT::Other), + SaveXMMOps, MVT::i8, StoreMMO)); } if (!MemOps.empty()) diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index f6f1b4356446..6805cb75f0f2 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -650,10 +650,6 @@ namespace llvm { // packed single precision. DPBF16PS, - // Save xmm argument registers to the stack, according to %al. An operator - // is needed so that this can be expanded with control flow. - VASTART_SAVE_XMM_REGS, - // A stack checking function call. On Windows it's _chkstk call. DYN_ALLOCA, @@ -871,6 +867,10 @@ namespace llvm { AESENCWIDE256KL, AESDECWIDE256KL, + // Save xmm argument registers to the stack, according to %al. An operator + // is needed so that this can be expanded with control flow. + VASTART_SAVE_XMM_REGS, + // WARNING: Do not add anything in the end unless you want the node to // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all // opcodes will be thought as target memory ops! diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td index bb878660231e..ba52283b570d 100644 --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -69,16 +69,12 @@ def : Pat<(X86callseq_start timm:$amt1, timm:$amt2), let SchedRW = [WriteSystem] in { // x86-64 va_start lowering magic. -let hasSideEffects = 1, Defs = [EFLAGS] in { +let hasSideEffects = 1, mayStore = 1, Defs = [EFLAGS] in { def VASTART_SAVE_XMM_REGS : I<0, Pseudo, (outs), - (ins GR8:$al, - i32imm:$regsavefi, i32imm:$offset, - variable_ops), - "#VASTART_SAVE_XMM_REGS $al, $regsavefi, $offset", - [(X86vastart_save_xmm_regs GR8:$al, - timm:$regsavefi, - timm:$offset), + (ins GR8:$al, i8mem:$regsavefi, variable_ops), + "#VASTART_SAVE_XMM_REGS $al, $regsavefi", + [(X86vastart_save_xmm_regs GR8:$al, addr:$regsavefi), (implicit EFLAGS)]>; } diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td index 6a619aff0a45..fee9939b8dfc 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -91,8 +91,7 @@ def SDT_X86Call : SDTypeProfile<0, -1, [SDTCisVT<0, iPTR>]>; def SDT_X86NtBrind : SDTypeProfile<0, -1, [SDTCisVT<0, iPTR>]>; def SDT_X86VASTART_SAVE_XMM_REGS : SDTypeProfile<0, -1, [SDTCisVT<0, i8>, - SDTCisVT<1, iPTR>, - SDTCisVT<2, iPTR>]>; + SDTCisPtrTy<1>]>; def SDT_X86VAARG : SDTypeProfile<1, -1, [SDTCisPtrTy<0>, SDTCisPtrTy<1>, @@ -184,7 +183,7 @@ def X86iret : SDNode<"X86ISD::IRET", SDTX86Ret, def X86vastart_save_xmm_regs : SDNode<"X86ISD::VASTART_SAVE_XMM_REGS", SDT_X86VASTART_SAVE_XMM_REGS, - [SDNPHasChain, SDNPVariadic]>; + [SDNPHasChain, SDNPMayStore, SDNPMemOperand, SDNPVariadic]>; def X86vaarg64 : SDNode<"X86ISD::VAARG_64", SDT_X86VAARG, [SDNPHasChain, SDNPMayLoad, SDNPMayStore, diff --git a/llvm/test/CodeGen/X86/vaargs-prolog-insert.ll b/llvm/test/CodeGen/X86/vaargs-prolog-insert.ll index 952a9e2d8b4e..ccd8cdff891a 100644 --- a/llvm/test/CodeGen/X86/vaargs-prolog-insert.ll +++ b/llvm/test/CodeGen/X86/vaargs-prolog-insert.ll @@ -5,6 +5,7 @@ define void @reduce(i32, i32, i32, i32, i32, i32, ...) nounwind { ; CHECK-LABEL: reduce: ; CHECK: # %bb.0: +; CHECK-NEXT: subq $56, %rsp ; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: je .LBB0_4 ; CHECK-NEXT: # %bb.3: @@ -21,15 +22,14 @@ define void @reduce(i32, i32, i32, i32, i32, i32, ...) nounwind { ; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: jne .LBB0_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: subq $56, %rsp ; CHECK-NEXT: leaq -{{[0-9]+}}(%rsp), %rax ; CHECK-NEXT: movq %rax, 16 ; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rax ; CHECK-NEXT: movq %rax, 8 ; CHECK-NEXT: movl $48, 4 ; CHECK-NEXT: movl $48, 0 -; CHECK-NEXT: addq $56, %rsp ; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: addq $56, %rsp ; CHECK-NEXT: retq br i1 undef, label %8, label %7