[NVPTX] Add add.cc/addc.cc/sub.cc/subc.cc for i64

PTX supports those instructions for i64 starting from 4.3.
The patch also marks corresponding DAG nodes legal for both i32 and i64.

Reviewed By: tra

Differential Revision: https://reviews.llvm.org/D124698
This commit is contained in:
Dmitry Vassiliev 2022-04-29 15:32:22 -07:00
parent 938ed8ae99
commit 8c49ab040c
4 changed files with 76 additions and 37 deletions

View File

@ -487,6 +487,17 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
setOperationAction(ISD::CTLZ, Ty, Legal); setOperationAction(ISD::CTLZ, Ty, Legal);
} }
setOperationAction(ISD::ADDC, MVT::i32, Legal);
setOperationAction(ISD::ADDE, MVT::i32, Legal);
setOperationAction(ISD::SUBC, MVT::i32, Legal);
setOperationAction(ISD::SUBE, MVT::i32, Legal);
if (STI.getPTXVersion() >= 43) {
setOperationAction(ISD::ADDC, MVT::i64, Legal);
setOperationAction(ISD::ADDE, MVT::i64, Legal);
setOperationAction(ISD::SUBC, MVT::i64, Legal);
setOperationAction(ISD::SUBE, MVT::i64, Legal);
}
setOperationAction(ISD::CTTZ, MVT::i16, Expand); setOperationAction(ISD::CTTZ, MVT::i16, Expand);
setOperationAction(ISD::CTTZ, MVT::i32, Expand); setOperationAction(ISD::CTTZ, MVT::i32, Expand);
setOperationAction(ISD::CTTZ, MVT::i64, Expand); setOperationAction(ISD::CTTZ, MVT::i64, Expand);

View File

@ -146,6 +146,7 @@ def True : Predicate<"true">;
def hasPTX31 : Predicate<"Subtarget->getPTXVersion() >= 31">; def hasPTX31 : Predicate<"Subtarget->getPTXVersion() >= 31">;
def hasPTX42 : Predicate<"Subtarget->getPTXVersion() >= 42">; def hasPTX42 : Predicate<"Subtarget->getPTXVersion() >= 42">;
def hasPTX43 : Predicate<"Subtarget->getPTXVersion() >= 43">;
def hasPTX60 : Predicate<"Subtarget->getPTXVersion() >= 60">; def hasPTX60 : Predicate<"Subtarget->getPTXVersion() >= 60">;
def hasPTX61 : Predicate<"Subtarget->getPTXVersion() >= 61">; def hasPTX61 : Predicate<"Subtarget->getPTXVersion() >= 61">;
def hasPTX63 : Predicate<"Subtarget->getPTXVersion() >= 63">; def hasPTX63 : Predicate<"Subtarget->getPTXVersion() >= 63">;
@ -204,17 +205,29 @@ multiclass I3<string OpcStr, SDNode OpNode> {
[(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>; [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>;
} }
// Template for instructions which take 3 int32 args. The instructions are // Template for instructions which take 3 int args. The instructions are
// named "<OpcStr>.s32" (e.g. "addc.cc.s32"). // named "<OpcStr>.s32" (e.g. "addc.cc.s32").
multiclass ADD_SUB_INT_32<string OpcStr, SDNode OpNode> { multiclass ADD_SUB_INT_CARRY<string OpcStr, SDNode OpNode> {
def i32rr : let hasSideEffects = 1 in {
NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), def i32rr :
!strconcat(OpcStr, ".s32 \t$dst, $a, $b;"), NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
[(set Int32Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>; !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"),
def i32ri : [(set Int32Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>;
NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), def i32ri :
!strconcat(OpcStr, ".s32 \t$dst, $a, $b;"), NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
[(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>; !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"),
[(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
def i64rr :
NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
!strconcat(OpcStr, ".s64 \t$dst, $a, $b;"),
[(set Int64Regs:$dst, (OpNode Int64Regs:$a, Int64Regs:$b))]>,
Requires<[hasPTX43]>;
def i64ri :
NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
!strconcat(OpcStr, ".s64 \t$dst, $a, $b;"),
[(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>,
Requires<[hasPTX43]>;
}
} }
// Template for instructions which take three fp64 or fp32 args. The // Template for instructions which take three fp64 or fp32 args. The
@ -584,14 +597,13 @@ defm SUB_i1 : ADD_SUB_i1<sub>;
defm ADD : I3<"add.s", add>; defm ADD : I3<"add.s", add>;
defm SUB : I3<"sub.s", sub>; defm SUB : I3<"sub.s", sub>;
// int32 addition and subtraction with carry-out. // in32 and int64 addition and subtraction with carry-out.
// FIXME: PTX 4.3 adds a 64-bit add.cc (and maybe also 64-bit addc.cc?). defm ADDCC : ADD_SUB_INT_CARRY<"add.cc", addc>;
defm ADDCC : ADD_SUB_INT_32<"add.cc", addc>; defm SUBCC : ADD_SUB_INT_CARRY<"sub.cc", subc>;
defm SUBCC : ADD_SUB_INT_32<"sub.cc", subc>;
// int32 addition and subtraction with carry-in and carry-out. // int32 and int64 addition and subtraction with carry-in and carry-out.
defm ADDCCC : ADD_SUB_INT_32<"addc.cc", adde>; defm ADDCCC : ADD_SUB_INT_CARRY<"addc.cc", adde>;
defm SUBCCC : ADD_SUB_INT_32<"subc.cc", sube>; defm SUBCCC : ADD_SUB_INT_CARRY<"subc.cc", sube>;
defm MULT : I3<"mul.lo.s", mul>; defm MULT : I3<"mul.lo.s", mul>;

View File

@ -1,20 +0,0 @@
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
define void @foo(i64 %a, i64 %add, i128* %retptr) {
; CHECK: add.s64
; CHECK: setp.lt.u64
; CHECK: setp.lt.u64
; CHECK: selp.u64
; CHECK: selp.b64
; CHECK: add.s64
%t1 = sext i64 %a to i128
%add2 = zext i64 %add to i128
%val = add i128 %t1, %add2
store i128 %val, i128* %retptr
ret void
}

View File

@ -0,0 +1,36 @@
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefixes=COMMON,NOCARRY
; RUN: llc < %s -march=nvptx -mcpu=sm_20 -mattr=+ptx43 | FileCheck %s --check-prefixes=COMMON,CARRY
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
; COMMON-LABEL: test_add
define i128 @test_add(i128 %a, i128 %b) {
; NOCARRY: add.s64
; NOCARRY-NEXT: setp.lt.u64
; NOCARRY-NEXT: setp.lt.u64
; NOCARRY-NEXT: selp.u64
; NOCARRY-NEXT: selp.b64
; NOCARRY-NEXT: add.s64
; CARRY: add.cc.s64
; CARRY-NEXT: addc.cc.s64
%1 = add i128 %a, %b
ret i128 %1
}
; COMMON-LABEL: test_sub
define i128 @test_sub(i128 %a, i128 %b) {
; NOCARRY: sub.s64
; NOCARRY-NEXT: setp.lt.u64
; NOCARRY-NEXT: selp.s64
; NOCARRY-NEXT: add.s64
; NOCARRY-NEXT: sub.s64
; CARRY: sub.cc.s64
; CARRY-NEXT: subc.cc.s64
%1 = sub i128 %a, %b
ret i128 %1
}