From 8c49ab040c67420c5fe4e2cbfe869beb20095c7a Mon Sep 17 00:00:00 2001 From: Dmitry Vassiliev Date: Fri, 29 Apr 2022 15:32:22 -0700 Subject: [PATCH] [NVPTX] Add add.cc/addc.cc/sub.cc/subc.cc for i64 PTX supports those instructions for i64 starting from 4.3. The patch also marks corresponding DAG nodes legal for both i32 and i64. Reviewed By: tra Differential Revision: https://reviews.llvm.org/D124698 --- llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 11 +++++ llvm/lib/Target/NVPTX/NVPTXInstrInfo.td | 46 +++++++++++++-------- llvm/test/CodeGen/NVPTX/add-128bit.ll | 20 --------- llvm/test/CodeGen/NVPTX/add-sub-128bit.ll | 36 ++++++++++++++++ 4 files changed, 76 insertions(+), 37 deletions(-) delete mode 100644 llvm/test/CodeGen/NVPTX/add-128bit.ll create mode 100644 llvm/test/CodeGen/NVPTX/add-sub-128bit.ll diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index 6b2f574ab93e..3c357a440a32 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -487,6 +487,17 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, setOperationAction(ISD::CTLZ, Ty, Legal); } + setOperationAction(ISD::ADDC, MVT::i32, Legal); + setOperationAction(ISD::ADDE, MVT::i32, Legal); + setOperationAction(ISD::SUBC, MVT::i32, Legal); + setOperationAction(ISD::SUBE, MVT::i32, Legal); + if (STI.getPTXVersion() >= 43) { + setOperationAction(ISD::ADDC, MVT::i64, Legal); + setOperationAction(ISD::ADDE, MVT::i64, Legal); + setOperationAction(ISD::SUBC, MVT::i64, Legal); + setOperationAction(ISD::SUBE, MVT::i64, Legal); + } + setOperationAction(ISD::CTTZ, MVT::i16, Expand); setOperationAction(ISD::CTTZ, MVT::i32, Expand); setOperationAction(ISD::CTTZ, MVT::i64, Expand); diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index 5ca01c8aafbb..6f9c40feb10e 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -146,6 +146,7 @@ def True : Predicate<"true">; def hasPTX31 : Predicate<"Subtarget->getPTXVersion() >= 31">; def hasPTX42 : Predicate<"Subtarget->getPTXVersion() >= 42">; +def hasPTX43 : Predicate<"Subtarget->getPTXVersion() >= 43">; def hasPTX60 : Predicate<"Subtarget->getPTXVersion() >= 60">; def hasPTX61 : Predicate<"Subtarget->getPTXVersion() >= 61">; def hasPTX63 : Predicate<"Subtarget->getPTXVersion() >= 63">; @@ -204,17 +205,29 @@ multiclass I3 { [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>; } -// Template for instructions which take 3 int32 args. The instructions are +// Template for instructions which take 3 int args. The instructions are // named ".s32" (e.g. "addc.cc.s32"). -multiclass ADD_SUB_INT_32 { - def i32rr : - NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), - !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>; - def i32ri : - NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), - !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>; +multiclass ADD_SUB_INT_CARRY { + let hasSideEffects = 1 in { + def i32rr : + NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), + !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>; + def i32ri : + NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), + !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>; + def i64rr : + NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b), + !strconcat(OpcStr, ".s64 \t$dst, $a, $b;"), + [(set Int64Regs:$dst, (OpNode Int64Regs:$a, Int64Regs:$b))]>, + Requires<[hasPTX43]>; + def i64ri : + NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b), + !strconcat(OpcStr, ".s64 \t$dst, $a, $b;"), + [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>, + Requires<[hasPTX43]>; + } } // Template for instructions which take three fp64 or fp32 args. The @@ -584,14 +597,13 @@ defm SUB_i1 : ADD_SUB_i1; defm ADD : I3<"add.s", add>; defm SUB : I3<"sub.s", sub>; -// int32 addition and subtraction with carry-out. -// FIXME: PTX 4.3 adds a 64-bit add.cc (and maybe also 64-bit addc.cc?). -defm ADDCC : ADD_SUB_INT_32<"add.cc", addc>; -defm SUBCC : ADD_SUB_INT_32<"sub.cc", subc>; +// in32 and int64 addition and subtraction with carry-out. +defm ADDCC : ADD_SUB_INT_CARRY<"add.cc", addc>; +defm SUBCC : ADD_SUB_INT_CARRY<"sub.cc", subc>; -// int32 addition and subtraction with carry-in and carry-out. -defm ADDCCC : ADD_SUB_INT_32<"addc.cc", adde>; -defm SUBCCC : ADD_SUB_INT_32<"subc.cc", sube>; +// int32 and int64 addition and subtraction with carry-in and carry-out. +defm ADDCCC : ADD_SUB_INT_CARRY<"addc.cc", adde>; +defm SUBCCC : ADD_SUB_INT_CARRY<"subc.cc", sube>; defm MULT : I3<"mul.lo.s", mul>; diff --git a/llvm/test/CodeGen/NVPTX/add-128bit.ll b/llvm/test/CodeGen/NVPTX/add-128bit.ll deleted file mode 100644 index 12283fc52002..000000000000 --- a/llvm/test/CodeGen/NVPTX/add-128bit.ll +++ /dev/null @@ -1,20 +0,0 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} - -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" - - - -define void @foo(i64 %a, i64 %add, i128* %retptr) { -; CHECK: add.s64 -; CHECK: setp.lt.u64 -; CHECK: setp.lt.u64 -; CHECK: selp.u64 -; CHECK: selp.b64 -; CHECK: add.s64 - %t1 = sext i64 %a to i128 - %add2 = zext i64 %add to i128 - %val = add i128 %t1, %add2 - store i128 %val, i128* %retptr - ret void -} diff --git a/llvm/test/CodeGen/NVPTX/add-sub-128bit.ll b/llvm/test/CodeGen/NVPTX/add-sub-128bit.ll new file mode 100644 index 000000000000..f096bad8042d --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/add-sub-128bit.ll @@ -0,0 +1,36 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefixes=COMMON,NOCARRY +; RUN: llc < %s -march=nvptx -mcpu=sm_20 -mattr=+ptx43 | FileCheck %s --check-prefixes=COMMON,CARRY +; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %} + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" + +; COMMON-LABEL: test_add +define i128 @test_add(i128 %a, i128 %b) { +; NOCARRY: add.s64 +; NOCARRY-NEXT: setp.lt.u64 +; NOCARRY-NEXT: setp.lt.u64 +; NOCARRY-NEXT: selp.u64 +; NOCARRY-NEXT: selp.b64 +; NOCARRY-NEXT: add.s64 + +; CARRY: add.cc.s64 +; CARRY-NEXT: addc.cc.s64 + + %1 = add i128 %a, %b + ret i128 %1 +} + +; COMMON-LABEL: test_sub +define i128 @test_sub(i128 %a, i128 %b) { +; NOCARRY: sub.s64 +; NOCARRY-NEXT: setp.lt.u64 +; NOCARRY-NEXT: selp.s64 +; NOCARRY-NEXT: add.s64 +; NOCARRY-NEXT: sub.s64 + +; CARRY: sub.cc.s64 +; CARRY-NEXT: subc.cc.s64 + + %1 = sub i128 %a, %b + ret i128 %1 +}