diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp index f29d127c85d1..9d42ac2e470c 100644 --- a/llvm/lib/Target/X86/X86FastISel.cpp +++ b/llvm/lib/Target/X86/X86FastISel.cpp @@ -1933,7 +1933,7 @@ bool X86FastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo, XII.foldMemoryOperandImpl(*FuncInfo.MF, MI, OpNo, AddrOps, Size, Alignment); if (Result == 0) return false; - MI->getParent()->insert(MI, Result); + FuncInfo.MBB->insert(FuncInfo.InsertPt, Result); MI->eraseFromParent(); return true; } diff --git a/llvm/test/CodeGen/X86/fast-isel-gep.ll b/llvm/test/CodeGen/X86/fast-isel-gep.ll index 577dd7223a4d..622a1ff831d0 100644 --- a/llvm/test/CodeGen/X86/fast-isel-gep.ll +++ b/llvm/test/CodeGen/X86/fast-isel-gep.ll @@ -70,3 +70,20 @@ entry: ; X64: test4: ; X64: 128(%r{{.*}},%r{{.*}},8) } + +; PR8961 - Make sure the sext for the GEP addressing comes before the load that +; is folded. +define i64 @test5(i8* %A, i32 %I, i64 %B) nounwind { + %v8 = getelementptr i8* %A, i32 %I + %v9 = bitcast i8* %v8 to i64* + %v10 = load i64* %v9 + %v11 = add i64 %B, %v10 + ret i64 %v11 +; X64: test5: +; X64: movslq %esi, %rax +; X64-NEXT: movq (%rdi,%rax), %rax +; X64-NEXT: addq %rdx, %rax +; X64-NEXT: ret +} + +