[RISCV] Improve i32 UADDSAT/USUBSAT on RV64.

The default promotion uses zero extends that become shifts. We
cam use sign extend instead which is better for RISCV.

I've used two different implementations based on whether we
have minu/maxu instructions.

Differential Revision: https://reviews.llvm.org/D98683
This commit is contained in:
Craig Topper 2021-03-16 00:29:42 -07:00
parent 1cb15b10ea
commit a33ce06cf5
5 changed files with 52 additions and 64 deletions

View File

@ -207,6 +207,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::UADDO, MVT::i32, Custom);
setOperationAction(ISD::USUBO, MVT::i32, Custom);
setOperationAction(ISD::UADDSAT, MVT::i32, Custom);
setOperationAction(ISD::USUBSAT, MVT::i32, Custom);
}
if (!Subtarget.hasStdExtM()) {
@ -3521,6 +3523,29 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
Results.push_back(Overflow);
return;
}
case ISD::UADDSAT:
case ISD::USUBSAT: {
assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
"Unexpected custom legalisation");
SDLoc DL(N);
if (Subtarget.hasStdExtZbb()) {
// With Zbb we can sign extend and let LegalizeDAG use minu/maxu. Using
// sign extend allows overflow of the lower 32 bits to be detected on
// the promoted size.
SDValue LHS =
DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
SDValue RHS =
DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
SDValue Res = DAG.getNode(N->getOpcode(), DL, MVT::i64, LHS, RHS);
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
return;
}
// Without Zbb, expand to UADDO/USUBO+select which will trigger our custom
// promotion for UADDO/USUBO.
Results.push_back(expandAddSubSat(N, DAG));
return;
}
case ISD::BITCAST: {
assert(((N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
Subtarget.hasStdExtF()) ||

View File

@ -24,19 +24,13 @@ define signext i32 @func(i32 signext %x, i32 signext %y) nounwind {
;
; RV64I-LABEL: func:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a1, a1, 32
; RV64I-NEXT: srli a1, a1, 32
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: srli a0, a0, 32
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: addi a1, zero, 1
; RV64I-NEXT: slli a1, a1, 32
; RV64I-NEXT: addi a1, a1, -1
; RV64I-NEXT: bltu a0, a1, .LBB0_2
; RV64I-NEXT: mv a2, a0
; RV64I-NEXT: addw a1, a0, a1
; RV64I-NEXT: addi a0, zero, -1
; RV64I-NEXT: bltu a1, a2, .LBB0_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: mv a0, a1
; RV64I-NEXT: .LBB0_2:
; RV64I-NEXT: sext.w a0, a0
; RV64I-NEXT: ret
;
; RV32IZbb-LABEL: func:
@ -48,16 +42,9 @@ define signext i32 @func(i32 signext %x, i32 signext %y) nounwind {
;
; RV64IZbb-LABEL: func:
; RV64IZbb: # %bb.0:
; RV64IZbb-NEXT: slli a1, a1, 32
; RV64IZbb-NEXT: srli a1, a1, 32
; RV64IZbb-NEXT: slli a0, a0, 32
; RV64IZbb-NEXT: srli a0, a0, 32
; RV64IZbb-NEXT: add a0, a0, a1
; RV64IZbb-NEXT: addi a1, zero, 1
; RV64IZbb-NEXT: slli a1, a1, 32
; RV64IZbb-NEXT: addi a1, a1, -1
; RV64IZbb-NEXT: minu a0, a0, a1
; RV64IZbb-NEXT: sext.w a0, a0
; RV64IZbb-NEXT: not a2, a1
; RV64IZbb-NEXT: minu a0, a0, a2
; RV64IZbb-NEXT: addw a0, a0, a1
; RV64IZbb-NEXT: ret
%tmp = call i32 @llvm.uadd.sat.i32(i32 %x, i32 %y);
ret i32 %tmp;

View File

@ -25,16 +25,11 @@ define i32 @func32(i32 %x, i32 %y, i32 %z) nounwind {
;
; RV64I-LABEL: func32:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: srli a0, a0, 32
; RV64I-NEXT: mul a1, a1, a2
; RV64I-NEXT: slli a1, a1, 32
; RV64I-NEXT: srli a1, a1, 32
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: addi a1, zero, 1
; RV64I-NEXT: slli a1, a1, 32
; RV64I-NEXT: addi a1, a1, -1
; RV64I-NEXT: bltu a0, a1, .LBB0_2
; RV64I-NEXT: addw a1, a0, a1
; RV64I-NEXT: sext.w a2, a0
; RV64I-NEXT: addi a0, zero, -1
; RV64I-NEXT: bltu a1, a2, .LBB0_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: mv a0, a1
; RV64I-NEXT: .LBB0_2:
@ -50,16 +45,11 @@ define i32 @func32(i32 %x, i32 %y, i32 %z) nounwind {
;
; RV64IZbb-LABEL: func32:
; RV64IZbb: # %bb.0:
; RV64IZbb-NEXT: slli a0, a0, 32
; RV64IZbb-NEXT: srli a0, a0, 32
; RV64IZbb-NEXT: mul a1, a1, a2
; RV64IZbb-NEXT: slli a1, a1, 32
; RV64IZbb-NEXT: srli a1, a1, 32
; RV64IZbb-NEXT: mulw a1, a1, a2
; RV64IZbb-NEXT: not a2, a1
; RV64IZbb-NEXT: sext.w a0, a0
; RV64IZbb-NEXT: minu a0, a0, a2
; RV64IZbb-NEXT: add a0, a0, a1
; RV64IZbb-NEXT: addi a1, zero, 1
; RV64IZbb-NEXT: slli a1, a1, 32
; RV64IZbb-NEXT: addi a1, a1, -1
; RV64IZbb-NEXT: minu a0, a0, a1
; RV64IZbb-NEXT: ret
%a = mul i32 %y, %z
%tmp = call i32 @llvm.uadd.sat.i32(i32 %x, i32 %a)

View File

@ -24,17 +24,13 @@ define signext i32 @func(i32 signext %x, i32 signext %y) nounwind {
;
; RV64I-LABEL: func:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a1, a1, 32
; RV64I-NEXT: srli a1, a1, 32
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: srli a2, a0, 32
; RV64I-NEXT: sub a0, a2, a1
; RV64I-NEXT: mv a1, zero
; RV64I-NEXT: bltu a2, a0, .LBB0_2
; RV64I-NEXT: mv a2, a0
; RV64I-NEXT: subw a1, a0, a1
; RV64I-NEXT: mv a0, zero
; RV64I-NEXT: bltu a2, a1, .LBB0_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: mv a1, a0
; RV64I-NEXT: mv a0, a1
; RV64I-NEXT: .LBB0_2:
; RV64I-NEXT: sext.w a0, a1
; RV64I-NEXT: ret
;
; RV32IZbb-LABEL: func:
@ -45,11 +41,7 @@ define signext i32 @func(i32 signext %x, i32 signext %y) nounwind {
;
; RV64IZbb-LABEL: func:
; RV64IZbb: # %bb.0:
; RV64IZbb-NEXT: slli a2, a1, 32
; RV64IZbb-NEXT: srli a2, a2, 32
; RV64IZbb-NEXT: slli a0, a0, 32
; RV64IZbb-NEXT: srli a0, a0, 32
; RV64IZbb-NEXT: maxu a0, a0, a2
; RV64IZbb-NEXT: maxu a0, a0, a1
; RV64IZbb-NEXT: subw a0, a0, a1
; RV64IZbb-NEXT: ret
%tmp = call i32 @llvm.usub.sat.i32(i32 %x, i32 %y);

View File

@ -25,14 +25,11 @@ define i32 @func32(i32 %x, i32 %y, i32 %z) nounwind {
;
; RV64I-LABEL: func32:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: srli a3, a0, 32
; RV64I-NEXT: mul a0, a1, a2
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: srli a0, a0, 32
; RV64I-NEXT: sub a1, a3, a0
; RV64I-NEXT: mul a1, a1, a2
; RV64I-NEXT: subw a1, a0, a1
; RV64I-NEXT: sext.w a2, a0
; RV64I-NEXT: mv a0, zero
; RV64I-NEXT: bltu a3, a1, .LBB0_2
; RV64I-NEXT: bltu a2, a1, .LBB0_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: mv a0, a1
; RV64I-NEXT: .LBB0_2:
@ -47,11 +44,8 @@ define i32 @func32(i32 %x, i32 %y, i32 %z) nounwind {
;
; RV64IZbb-LABEL: func32:
; RV64IZbb: # %bb.0:
; RV64IZbb-NEXT: slli a0, a0, 32
; RV64IZbb-NEXT: srli a0, a0, 32
; RV64IZbb-NEXT: mul a1, a1, a2
; RV64IZbb-NEXT: slli a1, a1, 32
; RV64IZbb-NEXT: srli a1, a1, 32
; RV64IZbb-NEXT: mulw a1, a1, a2
; RV64IZbb-NEXT: sext.w a0, a0
; RV64IZbb-NEXT: maxu a0, a0, a1
; RV64IZbb-NEXT: sub a0, a0, a1
; RV64IZbb-NEXT: ret