From 02b614abc89f2270eb391c8c56247b5271bf10b7 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 10 Dec 2018 06:07:50 +0000 Subject: [PATCH] [X86] Merge addcarryx/addcarry intrinsic into a single addcarry intrinsic. Both intrinsics do the exact same thing so we really only need one. Earlier in the 8.0 cycle we changed the signature of this intrinsic without renaming it. But it looks difficult to get the autoupgrade code to allow me to merge the intrinsics and change the signature at the same time. So I've renamed the intrinsic slightly for the new merged intrinsic. I'm skipping autoupgrading from the previous new to 8.0 signature. I've also renamed the subborrow for consistency. llvm-svn: 348737 --- llvm/include/llvm/IR/IntrinsicsX86.td | 14 +-- llvm/lib/IR/AutoUpgrade.cpp | 99 ++++++++----------- llvm/lib/Target/X86/X86IntrinsicsInfo.h | 10 +- llvm/test/CodeGen/X86/adx-commute.mir | 12 +-- llvm/test/CodeGen/X86/adx-intrinsics.ll | 84 +++++----------- .../CodeGen/X86/stack-folding-adx-x86_64.ll | 59 +++++++---- llvm/test/CodeGen/X86/stack-folding-adx.mir | 12 +-- 7 files changed, 125 insertions(+), 165 deletions(-) diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td index 53a1e214ae96..a59dbe7ea4a4 100644 --- a/llvm/include/llvm/IR/IntrinsicsX86.td +++ b/llvm/include/llvm/IR/IntrinsicsX86.td @@ -2726,22 +2726,16 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // ADX let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_addcarryx_u32: + def int_x86_addcarry_32: Intrinsic<[llvm_i8_ty, llvm_i32_ty], [llvm_i8_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_addcarryx_u64: + def int_x86_addcarry_64: Intrinsic<[llvm_i8_ty, llvm_i64_ty], [llvm_i8_ty, llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; - def int_x86_addcarry_u32: + def int_x86_subborrow_32: Intrinsic<[llvm_i8_ty, llvm_i32_ty], [llvm_i8_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_addcarry_u64: - Intrinsic<[llvm_i8_ty, llvm_i64_ty], - [llvm_i8_ty, llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; - def int_x86_subborrow_u32: - Intrinsic<[llvm_i8_ty, llvm_i32_ty], - [llvm_i8_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_subborrow_u64: + def int_x86_subborrow_64: Intrinsic<[llvm_i8_ty, llvm_i64_ty], [llvm_i8_ty, llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; } diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 83fc4787cf9a..39e29a2a093c 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -65,24 +65,19 @@ static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, return true; } -static bool UpgradeADCSBBIntrinsic(Function *F, Intrinsic::ID IID, - Function *&NewFn) { - // If this intrinsic has 3 operands, it's the new version. - if (F->getFunctionType()->getNumParams() == 3) - return false; - - rename(F); - NewFn = Intrinsic::getDeclaration(F->getParent(), IID); - return true; -} - static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) { // All of the intrinsics matches below should be marked with which llvm // version started autoupgrading them. At some point in the future we would // like to use this information to remove upgrade code for some older // intrinsics. It is currently undecided how we will determine that future // point. - if (Name.startswith("sse2.paddus.") || // Added in 8.0 + if (Name == "addcarryx.u32" || // Added in 8.0 + Name == "addcarryx.u64" || // Added in 8.0 + Name == "addcarry.u32" || // Added in 8.0 + Name == "addcarry.u64" || // Added in 8.0 + Name == "subborrow.u32" || // Added in 8.0 + Name == "subborrow.u64" || // Added in 8.0 + Name.startswith("sse2.paddus.") || // Added in 8.0 Name.startswith("sse2.psubus.") || // Added in 8.0 Name.startswith("avx2.paddus.") || // Added in 8.0 Name.startswith("avx2.psubus.") || // Added in 8.0 @@ -382,19 +377,6 @@ static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name, return true; } - if (Name == "addcarryx.u32") // Added in 8.0 - return UpgradeADCSBBIntrinsic(F, Intrinsic::x86_addcarryx_u32, NewFn); - if (Name == "addcarryx.u64") // Added in 8.0 - return UpgradeADCSBBIntrinsic(F, Intrinsic::x86_addcarryx_u64, NewFn); - if (Name == "addcarry.u32") // Added in 8.0 - return UpgradeADCSBBIntrinsic(F, Intrinsic::x86_addcarry_u32, NewFn); - if (Name == "addcarry.u64") // Added in 8.0 - return UpgradeADCSBBIntrinsic(F, Intrinsic::x86_addcarry_u64, NewFn); - if (Name == "subborrow.u32") // Added in 8.0 - return UpgradeADCSBBIntrinsic(F, Intrinsic::x86_subborrow_u32, NewFn); - if (Name == "subborrow.u64") // Added in 8.0 - return UpgradeADCSBBIntrinsic(F, Intrinsic::x86_subborrow_u64, NewFn); - if (Name == "rdtscp") { // Added in 8.0 // If this intrinsic has 0 operands, it's the new version. if (F->getFunctionType()->getNumParams() == 0) @@ -3277,6 +3259,39 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType()) : CI->getArgOperand(0); Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru); + } else if (IsX86 && (Name == "addcarryx.u32" || Name == "addcarryx.u64" || + Name == "addcarry.u32" || Name == "addcarry.u64" || + Name == "subborrow.u32" || Name == "subborrow.u64")) { + Intrinsic::ID IID; + if (Name[0] == 'a' && Name.back() == '2') + IID = Intrinsic::x86_addcarry_32; + else if (Name[0] == 'a' && Name.back() == '4') + IID = Intrinsic::x86_addcarry_64; + else if (Name[0] == 's' && Name.back() == '2') + IID = Intrinsic::x86_subborrow_32; + else if (Name[0] == 's' && Name.back() == '4') + IID = Intrinsic::x86_subborrow_64; + else + llvm_unreachable("Unexpected intrinsic"); + + // Make a call with 3 operands. + Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2)}; + Value *NewCall = Builder.CreateCall( + Intrinsic::getDeclaration(CI->getModule(), IID), + Args); + + // Extract the second result and store it. + Value *Data = Builder.CreateExtractValue(NewCall, 1); + // Cast the pointer to the right type. + Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(3), + llvm::PointerType::getUnqual(Data->getType())); + Builder.CreateAlignedStore(Data, Ptr, 1); + // Replace the original call result with the first result of the new call. + Value *CF = Builder.CreateExtractValue(NewCall, 0); + + CI->replaceAllUsesWith(CF); + Rep = nullptr; } else if (IsX86 && Name.startswith("avx512.mask.") && upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) { // Rep will be updated by the call in the condition. @@ -3478,40 +3493,6 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { return; } - case Intrinsic::x86_addcarryx_u32: - case Intrinsic::x86_addcarryx_u64: - case Intrinsic::x86_addcarry_u32: - case Intrinsic::x86_addcarry_u64: - case Intrinsic::x86_subborrow_u32: - case Intrinsic::x86_subborrow_u64: { - // This used to take 4 arguments. If we only have 3 arguments its already - // upgraded. - if (CI->getNumOperands() == 3) - return; - - // Make a call with 3 operands. - NewCall = Builder.CreateCall(NewFn, { CI->getArgOperand(0), - CI->getArgOperand(1), - CI->getArgOperand(2)}); - // Extract the second result and store it. - Value *Data = Builder.CreateExtractValue(NewCall, 1); - // Cast the pointer to the right type. - Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(3), - llvm::PointerType::getUnqual(Data->getType())); - Builder.CreateAlignedStore(Data, Ptr, 1); - // Replace the original call result with the first result of the new call. - Value *CF = Builder.CreateExtractValue(NewCall, 0); - - std::string Name = CI->getName(); - if (!Name.empty()) { - CI->setName(Name + ".old"); - NewCall->setName(Name); - } - CI->replaceAllUsesWith(CF); - CI->eraseFromParent(); - return; - } - case Intrinsic::x86_sse41_insertps: case Intrinsic::x86_sse41_dppd: case Intrinsic::x86_sse41_dpps: diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index e3e296136bd8..f1ed8ca48cdb 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -286,10 +286,8 @@ static const IntrinsicData* getIntrinsicWithChain(unsigned IntNo) { * the alphabetical order. */ static const IntrinsicData IntrinsicsWithoutChain[] = { - X86_INTRINSIC_DATA(addcarry_u32, ADX, X86ISD::ADC, X86ISD::ADD), - X86_INTRINSIC_DATA(addcarry_u64, ADX, X86ISD::ADC, X86ISD::ADD), - X86_INTRINSIC_DATA(addcarryx_u32, ADX, X86ISD::ADC, X86ISD::ADD), - X86_INTRINSIC_DATA(addcarryx_u64, ADX, X86ISD::ADC, X86ISD::ADD), + X86_INTRINSIC_DATA(addcarry_32, ADX, X86ISD::ADC, X86ISD::ADD), + X86_INTRINSIC_DATA(addcarry_64, ADX, X86ISD::ADC, X86ISD::ADD), X86_INTRINSIC_DATA(avx_addsub_pd_256, INTR_TYPE_2OP, X86ISD::ADDSUB, 0), X86_INTRINSIC_DATA(avx_addsub_ps_256, INTR_TYPE_2OP, X86ISD::ADDSUB, 0), X86_INTRINSIC_DATA(avx_cmp_pd_256, INTR_TYPE_3OP, X86ISD::CMPP, 0), @@ -1223,8 +1221,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(ssse3_pmadd_ub_sw_128, INTR_TYPE_2OP, X86ISD::VPMADDUBSW, 0), X86_INTRINSIC_DATA(ssse3_pmul_hr_sw_128, INTR_TYPE_2OP, X86ISD::MULHRS, 0), X86_INTRINSIC_DATA(ssse3_pshuf_b_128, INTR_TYPE_2OP, X86ISD::PSHUFB, 0), - X86_INTRINSIC_DATA(subborrow_u32, ADX, X86ISD::SBB, X86ISD::SUB), - X86_INTRINSIC_DATA(subborrow_u64, ADX, X86ISD::SBB, X86ISD::SUB), + X86_INTRINSIC_DATA(subborrow_32, ADX, X86ISD::SBB, X86ISD::SUB), + X86_INTRINSIC_DATA(subborrow_64, ADX, X86ISD::SBB, X86ISD::SUB), X86_INTRINSIC_DATA(tbm_bextri_u32, INTR_TYPE_2OP, X86ISD::BEXTR, 0), X86_INTRINSIC_DATA(tbm_bextri_u64, INTR_TYPE_2OP, X86ISD::BEXTR, 0), X86_INTRINSIC_DATA(vcvtph2ps_128, INTR_TYPE_1OP, X86ISD::CVTPH2PS, 0), diff --git a/llvm/test/CodeGen/X86/adx-commute.mir b/llvm/test/CodeGen/X86/adx-commute.mir index 11fb1e641849..1e204e339134 100644 --- a/llvm/test/CodeGen/X86/adx-commute.mir +++ b/llvm/test/CodeGen/X86/adx-commute.mir @@ -7,7 +7,7 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" define void @adcx32_commute(i8 %cf, i32 %a, i32 %b, i32* %res) #0 { - %ret = call { i8, i32 } @llvm.x86.addcarryx.u32(i8 %cf, i32 %a, i32 %b) + %ret = call { i8, i32 } @llvm.x86.addcarry.32(i8 %cf, i32 %a, i32 %b) %1 = extractvalue { i8, i32 } %ret, 1 %2 = mul i32 %a, %1 store i32 %2, i32* %res @@ -15,7 +15,7 @@ } define void @adcx64_commute(i8 %cf, i64 %a, i64 %b, i64* %res) #0 { - %ret = call { i8, i64 } @llvm.x86.addcarryx.u64(i8 %cf, i64 %a, i64 %b) + %ret = call { i8, i64 } @llvm.x86.addcarry.64(i8 %cf, i64 %a, i64 %b) %1 = extractvalue { i8, i64 } %ret, 1 %2 = mul i64 %a, %1 store i64 %2, i64* %res @@ -23,7 +23,7 @@ } define void @adox32_commute(i8 %cf, i32 %a, i32 %b, i32* %res) #0 { - %ret = call { i8, i32 } @llvm.x86.addcarryx.u32(i8 %cf, i32 %a, i32 %b) + %ret = call { i8, i32 } @llvm.x86.addcarry.32(i8 %cf, i32 %a, i32 %b) %1 = extractvalue { i8, i32 } %ret, 1 %2 = mul i32 %a, %1 store i32 %2, i32* %res @@ -31,7 +31,7 @@ } define void @adox64_commute(i8 %cf, i64 %a, i64 %b, i64* %res) #0 { - %ret = call { i8, i64 } @llvm.x86.addcarryx.u64(i8 %cf, i64 %a, i64 %b) + %ret = call { i8, i64 } @llvm.x86.addcarry.64(i8 %cf, i64 %a, i64 %b) %1 = extractvalue { i8, i64 } %ret, 1 %2 = mul i64 %a, %1 store i64 %2, i64* %res @@ -39,10 +39,10 @@ } ; Function Attrs: nounwind readnone - declare { i8, i32 } @llvm.x86.addcarryx.u32(i8, i32, i32) #1 + declare { i8, i32 } @llvm.x86.addcarry.32(i8, i32, i32) #1 ; Function Attrs: nounwind readnone - declare { i8, i64 } @llvm.x86.addcarryx.u64(i8, i64, i64) #1 + declare { i8, i64 } @llvm.x86.addcarry.64(i8, i64, i64) #1 ; Function Attrs: nounwind declare void @llvm.stackprotector(i8*, i8**) #2 diff --git a/llvm/test/CodeGen/X86/adx-intrinsics.ll b/llvm/test/CodeGen/X86/adx-intrinsics.ll index d6b75e12bc15..d37be0653289 100644 --- a/llvm/test/CodeGen/X86/adx-intrinsics.ll +++ b/llvm/test/CodeGen/X86/adx-intrinsics.ll @@ -2,26 +2,8 @@ ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 --show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=NOADX ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=broadwell --show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=ADX -declare { i8, i32 } @llvm.x86.addcarryx.u32(i8, i32, i32) - -define i8 @test_addcarryx_u32(i8 %c, i32 %a, i32 %b, i8* %ptr) { -; CHECK-LABEL: test_addcarryx_u32: -; CHECK: ## %bb.0: -; CHECK-NEXT: addb $-1, %dil ## encoding: [0x40,0x80,0xc7,0xff] -; CHECK-NEXT: adcl %edx, %esi ## encoding: [0x11,0xd6] -; CHECK-NEXT: setb %al ## encoding: [0x0f,0x92,0xc0] -; CHECK-NEXT: movl %esi, (%rcx) ## encoding: [0x89,0x31] -; CHECK-NEXT: retq ## encoding: [0xc3] - %ret = call { i8, i32 } @llvm.x86.addcarryx.u32(i8 %c, i32 %a, i32 %b) - %1 = extractvalue { i8, i32 } %ret, 1 - %2 = bitcast i8* %ptr to i32* - store i32 %1, i32* %2, align 1 - %3 = extractvalue { i8, i32 } %ret, 0 - ret i8 %3 -} - -define i8 @test_addcarryx_u32_load(i8 %c, i32* %aptr, i32 %b, i8* %ptr) { -; CHECK-LABEL: test_addcarryx_u32_load: +define i8 @test_addcarry_32_load(i8 %c, i32* %aptr, i32 %b, i8* %ptr) { +; CHECK-LABEL: test_addcarry_32_load: ; CHECK: ## %bb.0: ; CHECK-NEXT: addb $-1, %dil ## encoding: [0x40,0x80,0xc7,0xff] ; CHECK-NEXT: adcl (%rsi), %edx ## encoding: [0x13,0x16] @@ -29,7 +11,7 @@ define i8 @test_addcarryx_u32_load(i8 %c, i32* %aptr, i32 %b, i8* %ptr) { ; CHECK-NEXT: movl %edx, (%rcx) ## encoding: [0x89,0x11] ; CHECK-NEXT: retq ## encoding: [0xc3] %a = load i32, i32* %aptr - %ret = call { i8, i32 } @llvm.x86.addcarryx.u32(i8 %c, i32 %a, i32 %b) + %ret = call { i8, i32 } @llvm.x86.addcarry.32(i8 %c, i32 %a, i32 %b) %1 = extractvalue { i8, i32 } %ret, 1 %2 = bitcast i8* %ptr to i32* store i32 %1, i32* %2, align 1 @@ -37,8 +19,8 @@ define i8 @test_addcarryx_u32_load(i8 %c, i32* %aptr, i32 %b, i8* %ptr) { ret i8 %3 } -define i8 @test_addcarryx_u32_load2(i8 %c, i32 %a, i32* %bptr, i8* %ptr) { -; CHECK-LABEL: test_addcarryx_u32_load2: +define i8 @test_addcarry_32_load2(i8 %c, i32 %a, i32* %bptr, i8* %ptr) { +; CHECK-LABEL: test_addcarry_32_load2: ; CHECK: ## %bb.0: ; CHECK-NEXT: addb $-1, %dil ## encoding: [0x40,0x80,0xc7,0xff] ; CHECK-NEXT: adcl (%rdx), %esi ## encoding: [0x13,0x32] @@ -46,7 +28,7 @@ define i8 @test_addcarryx_u32_load2(i8 %c, i32 %a, i32* %bptr, i8* %ptr) { ; CHECK-NEXT: movl %esi, (%rcx) ## encoding: [0x89,0x31] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load i32, i32* %bptr - %ret = call { i8, i32 } @llvm.x86.addcarryx.u32(i8 %c, i32 %a, i32 %b) + %ret = call { i8, i32 } @llvm.x86.addcarry.32(i8 %c, i32 %a, i32 %b) %1 = extractvalue { i8, i32 } %ret, 1 %2 = bitcast i8* %ptr to i32* store i32 %1, i32* %2, align 1 @@ -54,35 +36,17 @@ define i8 @test_addcarryx_u32_load2(i8 %c, i32 %a, i32* %bptr, i8* %ptr) { ret i8 %3 } -declare { i8, i64 } @llvm.x86.addcarryx.u64(i8, i64, i64) +declare { i8, i32 } @llvm.x86.addcarry.32(i8, i32, i32) -define i8 @test_addcarryx_u64(i8 %c, i64 %a, i64 %b, i8* %ptr) { -; CHECK-LABEL: test_addcarryx_u64: -; CHECK: ## %bb.0: -; CHECK-NEXT: addb $-1, %dil ## encoding: [0x40,0x80,0xc7,0xff] -; CHECK-NEXT: adcq %rdx, %rsi ## encoding: [0x48,0x11,0xd6] -; CHECK-NEXT: setb %al ## encoding: [0x0f,0x92,0xc0] -; CHECK-NEXT: movq %rsi, (%rcx) ## encoding: [0x48,0x89,0x31] -; CHECK-NEXT: retq ## encoding: [0xc3] - %ret = call { i8, i64 } @llvm.x86.addcarryx.u64(i8 %c, i64 %a, i64 %b) - %1 = extractvalue { i8, i64 } %ret, 1 - %2 = bitcast i8* %ptr to i64* - store i64 %1, i64* %2, align 1 - %3 = extractvalue { i8, i64 } %ret, 0 - ret i8 %3 -} - -declare { i8, i32 } @llvm.x86.addcarry.u32(i8, i32, i32) - -define i8 @test_addcarry_u32(i8 %c, i32 %a, i32 %b, i8* %ptr) { -; CHECK-LABEL: test_addcarry_u32: +define i8 @test_addcarry_32(i8 %c, i32 %a, i32 %b, i8* %ptr) { +; CHECK-LABEL: test_addcarry_32: ; CHECK: ## %bb.0: ; CHECK-NEXT: addb $-1, %dil ## encoding: [0x40,0x80,0xc7,0xff] ; CHECK-NEXT: adcl %edx, %esi ## encoding: [0x11,0xd6] ; CHECK-NEXT: setb %al ## encoding: [0x0f,0x92,0xc0] ; CHECK-NEXT: movl %esi, (%rcx) ## encoding: [0x89,0x31] ; CHECK-NEXT: retq ## encoding: [0xc3] - %ret = call { i8, i32 } @llvm.x86.addcarry.u32(i8 %c, i32 %a, i32 %b) + %ret = call { i8, i32 } @llvm.x86.addcarry.32(i8 %c, i32 %a, i32 %b) %1 = extractvalue { i8, i32 } %ret, 1 %2 = bitcast i8* %ptr to i32* store i32 %1, i32* %2, align 1 @@ -90,17 +54,17 @@ define i8 @test_addcarry_u32(i8 %c, i32 %a, i32 %b, i8* %ptr) { ret i8 %3 } -declare { i8, i64 } @llvm.x86.addcarry.u64(i8, i64, i64) +declare { i8, i64 } @llvm.x86.addcarry.64(i8, i64, i64) -define i8 @test_addcarry_u64(i8 %c, i64 %a, i64 %b, i8* %ptr) { -; CHECK-LABEL: test_addcarry_u64: +define i8 @test_addcarry_64(i8 %c, i64 %a, i64 %b, i8* %ptr) { +; CHECK-LABEL: test_addcarry_64: ; CHECK: ## %bb.0: ; CHECK-NEXT: addb $-1, %dil ## encoding: [0x40,0x80,0xc7,0xff] ; CHECK-NEXT: adcq %rdx, %rsi ## encoding: [0x48,0x11,0xd6] ; CHECK-NEXT: setb %al ## encoding: [0x0f,0x92,0xc0] ; CHECK-NEXT: movq %rsi, (%rcx) ## encoding: [0x48,0x89,0x31] ; CHECK-NEXT: retq ## encoding: [0xc3] - %ret = call { i8, i64 } @llvm.x86.addcarry.u64(i8 %c, i64 %a, i64 %b) + %ret = call { i8, i64 } @llvm.x86.addcarry.64(i8 %c, i64 %a, i64 %b) %1 = extractvalue { i8, i64 } %ret, 1 %2 = bitcast i8* %ptr to i64* store i64 %1, i64* %2, align 1 @@ -108,17 +72,17 @@ define i8 @test_addcarry_u64(i8 %c, i64 %a, i64 %b, i8* %ptr) { ret i8 %3 } -declare { i8, i32 } @llvm.x86.subborrow.u32(i8, i32, i32) +declare { i8, i32 } @llvm.x86.subborrow.32(i8, i32, i32) -define i8 @test_subborrow_u32(i8 %c, i32 %a, i32 %b, i8* %ptr) { -; CHECK-LABEL: test_subborrow_u32: +define i8 @test_subborrow_32(i8 %c, i32 %a, i32 %b, i8* %ptr) { +; CHECK-LABEL: test_subborrow_32: ; CHECK: ## %bb.0: ; CHECK-NEXT: addb $-1, %dil ## encoding: [0x40,0x80,0xc7,0xff] ; CHECK-NEXT: sbbl %edx, %esi ## encoding: [0x19,0xd6] ; CHECK-NEXT: setb %al ## encoding: [0x0f,0x92,0xc0] ; CHECK-NEXT: movl %esi, (%rcx) ## encoding: [0x89,0x31] ; CHECK-NEXT: retq ## encoding: [0xc3] - %ret = call { i8, i32 } @llvm.x86.subborrow.u32(i8 %c, i32 %a, i32 %b) + %ret = call { i8, i32 } @llvm.x86.subborrow.32(i8 %c, i32 %a, i32 %b) %1 = extractvalue { i8, i32 } %ret, 1 %2 = bitcast i8* %ptr to i32* store i32 %1, i32* %2, align 1 @@ -126,17 +90,17 @@ define i8 @test_subborrow_u32(i8 %c, i32 %a, i32 %b, i8* %ptr) { ret i8 %3 } -declare { i8, i64 } @llvm.x86.subborrow.u64(i8, i64, i64) +declare { i8, i64 } @llvm.x86.subborrow.64(i8, i64, i64) -define i8 @test_subborrow_u64(i8 %c, i64 %a, i64 %b, i8* %ptr) { -; CHECK-LABEL: test_subborrow_u64: +define i8 @test_subborrow_64(i8 %c, i64 %a, i64 %b, i8* %ptr) { +; CHECK-LABEL: test_subborrow_64: ; CHECK: ## %bb.0: ; CHECK-NEXT: addb $-1, %dil ## encoding: [0x40,0x80,0xc7,0xff] ; CHECK-NEXT: sbbq %rdx, %rsi ## encoding: [0x48,0x19,0xd6] ; CHECK-NEXT: setb %al ## encoding: [0x0f,0x92,0xc0] ; CHECK-NEXT: movq %rsi, (%rcx) ## encoding: [0x48,0x89,0x31] ; CHECK-NEXT: retq ## encoding: [0xc3] - %ret = call { i8, i64 } @llvm.x86.subborrow.u64(i8 %c, i64 %a, i64 %b) + %ret = call { i8, i64 } @llvm.x86.subborrow.64(i8 %c, i64 %a, i64 %b) %1 = extractvalue { i8, i64 } %ret, 1 %2 = bitcast i8* %ptr to i64* store i64 %1, i64* %2, align 1 @@ -157,7 +121,7 @@ define i32 @load_crash(i64* nocapture readonly %a, i64* nocapture readonly %b, i %1 = load i64, i64* %a, align 8 %2 = load i64, i64* %b, align 8 %3 = bitcast i64* %res to i8* - %4 = call { i8, i64 } @llvm.x86.addcarryx.u64(i8 0, i64 %1, i64 %2) + %4 = call { i8, i64 } @llvm.x86.addcarry.64(i8 0, i64 %1, i64 %2) %5 = extractvalue { i8, i64 } %4, 1 %6 = bitcast i8* %3 to i64* store i64 %5, i64* %6, align 1 @@ -175,7 +139,7 @@ define void @allzeros() { ; CHECK-NEXT: movq %rax, 0 ## encoding: [0x48,0x89,0x04,0x25,0x00,0x00,0x00,0x00] ; CHECK-NEXT: retq ## encoding: [0xc3] entry: - %0 = call { i8, i64 } @llvm.x86.addcarryx.u64(i8 0, i64 0, i64 0) + %0 = call { i8, i64 } @llvm.x86.addcarry.64(i8 0, i64 0, i64 0) %1 = extractvalue { i8, i64 } %0, 1 store i64 %1, i64* null, align 1 %2 = extractvalue { i8, i64 } %0, 0 diff --git a/llvm/test/CodeGen/X86/stack-folding-adx-x86_64.ll b/llvm/test/CodeGen/X86/stack-folding-adx-x86_64.ll index 22d9c39125cc..fa00b7cd7fcc 100644 --- a/llvm/test/CodeGen/X86/stack-folding-adx-x86_64.ll +++ b/llvm/test/CodeGen/X86/stack-folding-adx-x86_64.ll @@ -13,52 +13,75 @@ define i8 @stack_fold_addcarry_u32(i8 %a0, i32 %a1, i32 %a2, i8* %a3) { ;CHECK-LABEL: stack_fold_addcarry_u32 ;CHECK: adcl {{-?[0-9]*}}(%rsp), %{{.*}} {{.*#+}} 4-byte Folded Reload %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() - %2 = tail call i8 @llvm.x86.addcarry.u32(i8 %a0, i32 %a1, i32 %a2, i8* %a3) - ret i8 %2; + %2 = call { i8, i32 } @llvm.x86.addcarry.32(i8 %a0, i32 %a1, i32 %a2) + %3 = extractvalue { i8, i32 } %2, 1 + %4 = bitcast i8* %a3 to i32* + store i32 %3, i32* %4, align 1 + %5 = extractvalue { i8, i32 } %2, 0 + ret i8 %5 } -declare i8 @llvm.x86.addcarry.u32(i8, i32, i32, i8*) define i8 @stack_fold_addcarry_u64(i8 %a0, i64 %a1, i64 %a2, i8* %a3) { ;CHECK-LABEL: stack_fold_addcarry_u64 ;CHECK: adcq {{-?[0-9]*}}(%rsp), %{{.*}} {{.*#+}} 8-byte Folded Reload %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() - %2 = tail call i8 @llvm.x86.addcarry.u64(i8 %a0, i64 %a1, i64 %a2, i8* %a3) - ret i8 %2; + %2 = call { i8, i64 } @llvm.x86.addcarry.64(i8 %a0, i64 %a1, i64 %a2) + %3 = extractvalue { i8, i64 } %2, 1 + %4 = bitcast i8* %a3 to i64* + store i64 %3, i64* %4, align 1 + %5 = extractvalue { i8, i64 } %2, 0 + ret i8 %5 } -declare i8 @llvm.x86.addcarry.u64(i8, i64, i64, i8*) define i8 @stack_fold_addcarryx_u32(i8 %a0, i32 %a1, i32 %a2, i8* %a3) { ;CHECK-LABEL: stack_fold_addcarryx_u32 ;CHECK: adcl {{-?[0-9]*}}(%rsp), %{{.*}} {{.*#+}} 4-byte Folded Reload %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() - %2 = tail call i8 @llvm.x86.addcarryx.u32(i8 %a0, i32 %a1, i32 %a2, i8* %a3) - ret i8 %2; + %2 = call { i8, i32 } @llvm.x86.addcarry.32(i8 %a0, i32 %a1, i32 %a2) + %3 = extractvalue { i8, i32 } %2, 1 + %4 = bitcast i8* %a3 to i32* + store i32 %3, i32* %4, align 1 + %5 = extractvalue { i8, i32 } %2, 0 + ret i8 %5 } -declare i8 @llvm.x86.addcarryx.u32(i8, i32, i32, i8*) define i8 @stack_fold_addcarryx_u64(i8 %a0, i64 %a1, i64 %a2, i8* %a3) { ;CHECK-LABEL: stack_fold_addcarryx_u64 ;CHECK: adcq {{-?[0-9]*}}(%rsp), %{{.*}} {{.*#+}} 8-byte Folded Reload %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() - %2 = tail call i8 @llvm.x86.addcarryx.u64(i8 %a0, i64 %a1, i64 %a2, i8* %a3) - ret i8 %2; + %2 = call { i8, i64 } @llvm.x86.addcarry.64(i8 %a0, i64 %a1, i64 %a2) + %3 = extractvalue { i8, i64 } %2, 1 + %4 = bitcast i8* %a3 to i64* + store i64 %3, i64* %4, align 1 + %5 = extractvalue { i8, i64 } %2, 0 + ret i8 %5 } -declare i8 @llvm.x86.addcarryx.u64(i8, i64, i64, i8*) define i8 @stack_fold_subborrow_u32(i8 %a0, i32 %a1, i32 %a2, i8* %a3) { ;CHECK-LABEL: stack_fold_subborrow_u32 ;CHECK: sbbl {{-?[0-9]*}}(%rsp), %{{.*}} {{.*#+}} 4-byte Folded Reload %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() - %2 = tail call i8 @llvm.x86.subborrow.u32(i8 %a0, i32 %a1, i32 %a2, i8* %a3) - ret i8 %2; + %2 = call { i8, i32 } @llvm.x86.subborrow.32(i8 %a0, i32 %a1, i32 %a2) + %3 = extractvalue { i8, i32 } %2, 1 + %4 = bitcast i8* %a3 to i32* + store i32 %3, i32* %4, align 1 + %5 = extractvalue { i8, i32 } %2, 0 + ret i8 %5 } -declare i8 @llvm.x86.subborrow.u32(i8, i32, i32, i8*) define i8 @stack_fold_subborrow_u64(i8 %a0, i64 %a1, i64 %a2, i8* %a3) { ;CHECK-LABEL: stack_fold_subborrow_u64 ;CHECK: sbbq {{-?[0-9]*}}(%rsp), %{{.*}} {{.*#+}} 8-byte Folded Reload %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() - %2 = tail call i8 @llvm.x86.subborrow.u64(i8 %a0, i64 %a1, i64 %a2, i8* %a3) - ret i8 %2; + %2 = call { i8, i64 } @llvm.x86.subborrow.64(i8 %a0, i64 %a1, i64 %a2) + %3 = extractvalue { i8, i64 } %2, 1 + %4 = bitcast i8* %a3 to i64* + store i64 %3, i64* %4, align 1 + %5 = extractvalue { i8, i64 } %2, 0 + ret i8 %5 } -declare i8 @llvm.x86.subborrow.u64(i8, i64, i64, i8*) + +declare { i8, i32 } @llvm.x86.addcarry.32(i8, i32, i32) +declare { i8, i64 } @llvm.x86.addcarry.64(i8, i64, i64) +declare { i8, i32 } @llvm.x86.subborrow.32(i8, i32, i32) +declare { i8, i64 } @llvm.x86.subborrow.64(i8, i64, i64) diff --git a/llvm/test/CodeGen/X86/stack-folding-adx.mir b/llvm/test/CodeGen/X86/stack-folding-adx.mir index b51d48095c20..6e977821853d 100644 --- a/llvm/test/CodeGen/X86/stack-folding-adx.mir +++ b/llvm/test/CodeGen/X86/stack-folding-adx.mir @@ -5,7 +5,7 @@ ; Function Attrs: nounwind define i8 @stack_fold_adcx32(i8 %a0, i32 %a1, i32 %a2, i8* %a3) #0 { %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() - %2 = call { i8, i32 } @llvm.x86.addcarryx.u32(i8 %a0, i32 %a1, i32 %a2) + %2 = call { i8, i32 } @llvm.x86.addcarry.32(i8 %a0, i32 %a1, i32 %a2) %3 = extractvalue { i8, i32 } %2, 1 %4 = bitcast i8* %a3 to i32* store i32 %3, i32* %4, align 1 @@ -16,7 +16,7 @@ ; Function Attrs: nounwind define i8 @stack_fold_adcx64(i8 %a0, i64 %a1, i64 %a2, i8* %a3) #0 { %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() - %2 = call { i8, i64 } @llvm.x86.addcarryx.u64(i8 %a0, i64 %a1, i64 %a2) + %2 = call { i8, i64 } @llvm.x86.addcarry.64(i8 %a0, i64 %a1, i64 %a2) %3 = extractvalue { i8, i64 } %2, 1 %4 = bitcast i8* %a3 to i64* store i64 %3, i64* %4, align 1 @@ -26,7 +26,7 @@ define i8 @stack_fold_adox32(i8 %a0, i32 %a1, i32 %a2, i8* %a3) #0 { %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() - %2 = call { i8, i32 } @llvm.x86.addcarryx.u32(i8 %a0, i32 %a1, i32 %a2) + %2 = call { i8, i32 } @llvm.x86.addcarry.32(i8 %a0, i32 %a1, i32 %a2) %3 = extractvalue { i8, i32 } %2, 1 %4 = bitcast i8* %a3 to i32* store i32 %3, i32* %4, align 1 @@ -37,7 +37,7 @@ ; Function Attrs: nounwind define i8 @stack_fold_adox64(i8 %a0, i64 %a1, i64 %a2, i8* %a3) #0 { %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() - %2 = call { i8, i64 } @llvm.x86.addcarryx.u64(i8 %a0, i64 %a1, i64 %a2) + %2 = call { i8, i64 } @llvm.x86.addcarry.64(i8 %a0, i64 %a1, i64 %a2) %3 = extractvalue { i8, i64 } %2, 1 %4 = bitcast i8* %a3 to i64* store i64 %3, i64* %4, align 1 @@ -46,10 +46,10 @@ } ; Function Attrs: nounwind readnone - declare { i8, i32 } @llvm.x86.addcarryx.u32(i8, i32, i32) #1 + declare { i8, i32 } @llvm.x86.addcarry.32(i8, i32, i32) #1 ; Function Attrs: nounwind readnone - declare { i8, i64 } @llvm.x86.addcarryx.u64(i8, i64, i64) #1 + declare { i8, i64 } @llvm.x86.addcarry.64(i8, i64, i64) #1 ; Function Attrs: nounwind declare void @llvm.stackprotector(i8*, i8**) #2