forked from OSchip/llvm-project
[RISCV] Improve i32 UADDSAT/USUBSAT on RV64.
The default promotion uses zero extends that become shifts. We cam use sign extend instead which is better for RISCV. I've used two different implementations based on whether we have minu/maxu instructions. Differential Revision: https://reviews.llvm.org/D98683
This commit is contained in:
parent
1cb15b10ea
commit
a33ce06cf5
|
@ -207,6 +207,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
|
||||||
|
|
||||||
setOperationAction(ISD::UADDO, MVT::i32, Custom);
|
setOperationAction(ISD::UADDO, MVT::i32, Custom);
|
||||||
setOperationAction(ISD::USUBO, MVT::i32, Custom);
|
setOperationAction(ISD::USUBO, MVT::i32, Custom);
|
||||||
|
setOperationAction(ISD::UADDSAT, MVT::i32, Custom);
|
||||||
|
setOperationAction(ISD::USUBSAT, MVT::i32, Custom);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!Subtarget.hasStdExtM()) {
|
if (!Subtarget.hasStdExtM()) {
|
||||||
|
@ -3521,6 +3523,29 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
|
||||||
Results.push_back(Overflow);
|
Results.push_back(Overflow);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
case ISD::UADDSAT:
|
||||||
|
case ISD::USUBSAT: {
|
||||||
|
assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
|
||||||
|
"Unexpected custom legalisation");
|
||||||
|
SDLoc DL(N);
|
||||||
|
if (Subtarget.hasStdExtZbb()) {
|
||||||
|
// With Zbb we can sign extend and let LegalizeDAG use minu/maxu. Using
|
||||||
|
// sign extend allows overflow of the lower 32 bits to be detected on
|
||||||
|
// the promoted size.
|
||||||
|
SDValue LHS =
|
||||||
|
DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
|
||||||
|
SDValue RHS =
|
||||||
|
DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
|
||||||
|
SDValue Res = DAG.getNode(N->getOpcode(), DL, MVT::i64, LHS, RHS);
|
||||||
|
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Without Zbb, expand to UADDO/USUBO+select which will trigger our custom
|
||||||
|
// promotion for UADDO/USUBO.
|
||||||
|
Results.push_back(expandAddSubSat(N, DAG));
|
||||||
|
return;
|
||||||
|
}
|
||||||
case ISD::BITCAST: {
|
case ISD::BITCAST: {
|
||||||
assert(((N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
|
assert(((N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
|
||||||
Subtarget.hasStdExtF()) ||
|
Subtarget.hasStdExtF()) ||
|
||||||
|
|
|
@ -24,19 +24,13 @@ define signext i32 @func(i32 signext %x, i32 signext %y) nounwind {
|
||||||
;
|
;
|
||||||
; RV64I-LABEL: func:
|
; RV64I-LABEL: func:
|
||||||
; RV64I: # %bb.0:
|
; RV64I: # %bb.0:
|
||||||
; RV64I-NEXT: slli a1, a1, 32
|
; RV64I-NEXT: mv a2, a0
|
||||||
; RV64I-NEXT: srli a1, a1, 32
|
; RV64I-NEXT: addw a1, a0, a1
|
||||||
; RV64I-NEXT: slli a0, a0, 32
|
; RV64I-NEXT: addi a0, zero, -1
|
||||||
; RV64I-NEXT: srli a0, a0, 32
|
; RV64I-NEXT: bltu a1, a2, .LBB0_2
|
||||||
; RV64I-NEXT: add a0, a0, a1
|
|
||||||
; RV64I-NEXT: addi a1, zero, 1
|
|
||||||
; RV64I-NEXT: slli a1, a1, 32
|
|
||||||
; RV64I-NEXT: addi a1, a1, -1
|
|
||||||
; RV64I-NEXT: bltu a0, a1, .LBB0_2
|
|
||||||
; RV64I-NEXT: # %bb.1:
|
; RV64I-NEXT: # %bb.1:
|
||||||
; RV64I-NEXT: mv a0, a1
|
; RV64I-NEXT: mv a0, a1
|
||||||
; RV64I-NEXT: .LBB0_2:
|
; RV64I-NEXT: .LBB0_2:
|
||||||
; RV64I-NEXT: sext.w a0, a0
|
|
||||||
; RV64I-NEXT: ret
|
; RV64I-NEXT: ret
|
||||||
;
|
;
|
||||||
; RV32IZbb-LABEL: func:
|
; RV32IZbb-LABEL: func:
|
||||||
|
@ -48,16 +42,9 @@ define signext i32 @func(i32 signext %x, i32 signext %y) nounwind {
|
||||||
;
|
;
|
||||||
; RV64IZbb-LABEL: func:
|
; RV64IZbb-LABEL: func:
|
||||||
; RV64IZbb: # %bb.0:
|
; RV64IZbb: # %bb.0:
|
||||||
; RV64IZbb-NEXT: slli a1, a1, 32
|
; RV64IZbb-NEXT: not a2, a1
|
||||||
; RV64IZbb-NEXT: srli a1, a1, 32
|
; RV64IZbb-NEXT: minu a0, a0, a2
|
||||||
; RV64IZbb-NEXT: slli a0, a0, 32
|
; RV64IZbb-NEXT: addw a0, a0, a1
|
||||||
; RV64IZbb-NEXT: srli a0, a0, 32
|
|
||||||
; RV64IZbb-NEXT: add a0, a0, a1
|
|
||||||
; RV64IZbb-NEXT: addi a1, zero, 1
|
|
||||||
; RV64IZbb-NEXT: slli a1, a1, 32
|
|
||||||
; RV64IZbb-NEXT: addi a1, a1, -1
|
|
||||||
; RV64IZbb-NEXT: minu a0, a0, a1
|
|
||||||
; RV64IZbb-NEXT: sext.w a0, a0
|
|
||||||
; RV64IZbb-NEXT: ret
|
; RV64IZbb-NEXT: ret
|
||||||
%tmp = call i32 @llvm.uadd.sat.i32(i32 %x, i32 %y);
|
%tmp = call i32 @llvm.uadd.sat.i32(i32 %x, i32 %y);
|
||||||
ret i32 %tmp;
|
ret i32 %tmp;
|
||||||
|
|
|
@ -25,16 +25,11 @@ define i32 @func32(i32 %x, i32 %y, i32 %z) nounwind {
|
||||||
;
|
;
|
||||||
; RV64I-LABEL: func32:
|
; RV64I-LABEL: func32:
|
||||||
; RV64I: # %bb.0:
|
; RV64I: # %bb.0:
|
||||||
; RV64I-NEXT: slli a0, a0, 32
|
|
||||||
; RV64I-NEXT: srli a0, a0, 32
|
|
||||||
; RV64I-NEXT: mul a1, a1, a2
|
; RV64I-NEXT: mul a1, a1, a2
|
||||||
; RV64I-NEXT: slli a1, a1, 32
|
; RV64I-NEXT: addw a1, a0, a1
|
||||||
; RV64I-NEXT: srli a1, a1, 32
|
; RV64I-NEXT: sext.w a2, a0
|
||||||
; RV64I-NEXT: add a0, a0, a1
|
; RV64I-NEXT: addi a0, zero, -1
|
||||||
; RV64I-NEXT: addi a1, zero, 1
|
; RV64I-NEXT: bltu a1, a2, .LBB0_2
|
||||||
; RV64I-NEXT: slli a1, a1, 32
|
|
||||||
; RV64I-NEXT: addi a1, a1, -1
|
|
||||||
; RV64I-NEXT: bltu a0, a1, .LBB0_2
|
|
||||||
; RV64I-NEXT: # %bb.1:
|
; RV64I-NEXT: # %bb.1:
|
||||||
; RV64I-NEXT: mv a0, a1
|
; RV64I-NEXT: mv a0, a1
|
||||||
; RV64I-NEXT: .LBB0_2:
|
; RV64I-NEXT: .LBB0_2:
|
||||||
|
@ -50,16 +45,11 @@ define i32 @func32(i32 %x, i32 %y, i32 %z) nounwind {
|
||||||
;
|
;
|
||||||
; RV64IZbb-LABEL: func32:
|
; RV64IZbb-LABEL: func32:
|
||||||
; RV64IZbb: # %bb.0:
|
; RV64IZbb: # %bb.0:
|
||||||
; RV64IZbb-NEXT: slli a0, a0, 32
|
; RV64IZbb-NEXT: mulw a1, a1, a2
|
||||||
; RV64IZbb-NEXT: srli a0, a0, 32
|
; RV64IZbb-NEXT: not a2, a1
|
||||||
; RV64IZbb-NEXT: mul a1, a1, a2
|
; RV64IZbb-NEXT: sext.w a0, a0
|
||||||
; RV64IZbb-NEXT: slli a1, a1, 32
|
; RV64IZbb-NEXT: minu a0, a0, a2
|
||||||
; RV64IZbb-NEXT: srli a1, a1, 32
|
|
||||||
; RV64IZbb-NEXT: add a0, a0, a1
|
; RV64IZbb-NEXT: add a0, a0, a1
|
||||||
; RV64IZbb-NEXT: addi a1, zero, 1
|
|
||||||
; RV64IZbb-NEXT: slli a1, a1, 32
|
|
||||||
; RV64IZbb-NEXT: addi a1, a1, -1
|
|
||||||
; RV64IZbb-NEXT: minu a0, a0, a1
|
|
||||||
; RV64IZbb-NEXT: ret
|
; RV64IZbb-NEXT: ret
|
||||||
%a = mul i32 %y, %z
|
%a = mul i32 %y, %z
|
||||||
%tmp = call i32 @llvm.uadd.sat.i32(i32 %x, i32 %a)
|
%tmp = call i32 @llvm.uadd.sat.i32(i32 %x, i32 %a)
|
||||||
|
|
|
@ -24,17 +24,13 @@ define signext i32 @func(i32 signext %x, i32 signext %y) nounwind {
|
||||||
;
|
;
|
||||||
; RV64I-LABEL: func:
|
; RV64I-LABEL: func:
|
||||||
; RV64I: # %bb.0:
|
; RV64I: # %bb.0:
|
||||||
; RV64I-NEXT: slli a1, a1, 32
|
; RV64I-NEXT: mv a2, a0
|
||||||
; RV64I-NEXT: srli a1, a1, 32
|
; RV64I-NEXT: subw a1, a0, a1
|
||||||
; RV64I-NEXT: slli a0, a0, 32
|
; RV64I-NEXT: mv a0, zero
|
||||||
; RV64I-NEXT: srli a2, a0, 32
|
; RV64I-NEXT: bltu a2, a1, .LBB0_2
|
||||||
; RV64I-NEXT: sub a0, a2, a1
|
|
||||||
; RV64I-NEXT: mv a1, zero
|
|
||||||
; RV64I-NEXT: bltu a2, a0, .LBB0_2
|
|
||||||
; RV64I-NEXT: # %bb.1:
|
; RV64I-NEXT: # %bb.1:
|
||||||
; RV64I-NEXT: mv a1, a0
|
; RV64I-NEXT: mv a0, a1
|
||||||
; RV64I-NEXT: .LBB0_2:
|
; RV64I-NEXT: .LBB0_2:
|
||||||
; RV64I-NEXT: sext.w a0, a1
|
|
||||||
; RV64I-NEXT: ret
|
; RV64I-NEXT: ret
|
||||||
;
|
;
|
||||||
; RV32IZbb-LABEL: func:
|
; RV32IZbb-LABEL: func:
|
||||||
|
@ -45,11 +41,7 @@ define signext i32 @func(i32 signext %x, i32 signext %y) nounwind {
|
||||||
;
|
;
|
||||||
; RV64IZbb-LABEL: func:
|
; RV64IZbb-LABEL: func:
|
||||||
; RV64IZbb: # %bb.0:
|
; RV64IZbb: # %bb.0:
|
||||||
; RV64IZbb-NEXT: slli a2, a1, 32
|
; RV64IZbb-NEXT: maxu a0, a0, a1
|
||||||
; RV64IZbb-NEXT: srli a2, a2, 32
|
|
||||||
; RV64IZbb-NEXT: slli a0, a0, 32
|
|
||||||
; RV64IZbb-NEXT: srli a0, a0, 32
|
|
||||||
; RV64IZbb-NEXT: maxu a0, a0, a2
|
|
||||||
; RV64IZbb-NEXT: subw a0, a0, a1
|
; RV64IZbb-NEXT: subw a0, a0, a1
|
||||||
; RV64IZbb-NEXT: ret
|
; RV64IZbb-NEXT: ret
|
||||||
%tmp = call i32 @llvm.usub.sat.i32(i32 %x, i32 %y);
|
%tmp = call i32 @llvm.usub.sat.i32(i32 %x, i32 %y);
|
||||||
|
|
|
@ -25,14 +25,11 @@ define i32 @func32(i32 %x, i32 %y, i32 %z) nounwind {
|
||||||
;
|
;
|
||||||
; RV64I-LABEL: func32:
|
; RV64I-LABEL: func32:
|
||||||
; RV64I: # %bb.0:
|
; RV64I: # %bb.0:
|
||||||
; RV64I-NEXT: slli a0, a0, 32
|
; RV64I-NEXT: mul a1, a1, a2
|
||||||
; RV64I-NEXT: srli a3, a0, 32
|
; RV64I-NEXT: subw a1, a0, a1
|
||||||
; RV64I-NEXT: mul a0, a1, a2
|
; RV64I-NEXT: sext.w a2, a0
|
||||||
; RV64I-NEXT: slli a0, a0, 32
|
|
||||||
; RV64I-NEXT: srli a0, a0, 32
|
|
||||||
; RV64I-NEXT: sub a1, a3, a0
|
|
||||||
; RV64I-NEXT: mv a0, zero
|
; RV64I-NEXT: mv a0, zero
|
||||||
; RV64I-NEXT: bltu a3, a1, .LBB0_2
|
; RV64I-NEXT: bltu a2, a1, .LBB0_2
|
||||||
; RV64I-NEXT: # %bb.1:
|
; RV64I-NEXT: # %bb.1:
|
||||||
; RV64I-NEXT: mv a0, a1
|
; RV64I-NEXT: mv a0, a1
|
||||||
; RV64I-NEXT: .LBB0_2:
|
; RV64I-NEXT: .LBB0_2:
|
||||||
|
@ -47,11 +44,8 @@ define i32 @func32(i32 %x, i32 %y, i32 %z) nounwind {
|
||||||
;
|
;
|
||||||
; RV64IZbb-LABEL: func32:
|
; RV64IZbb-LABEL: func32:
|
||||||
; RV64IZbb: # %bb.0:
|
; RV64IZbb: # %bb.0:
|
||||||
; RV64IZbb-NEXT: slli a0, a0, 32
|
; RV64IZbb-NEXT: mulw a1, a1, a2
|
||||||
; RV64IZbb-NEXT: srli a0, a0, 32
|
; RV64IZbb-NEXT: sext.w a0, a0
|
||||||
; RV64IZbb-NEXT: mul a1, a1, a2
|
|
||||||
; RV64IZbb-NEXT: slli a1, a1, 32
|
|
||||||
; RV64IZbb-NEXT: srli a1, a1, 32
|
|
||||||
; RV64IZbb-NEXT: maxu a0, a0, a1
|
; RV64IZbb-NEXT: maxu a0, a0, a1
|
||||||
; RV64IZbb-NEXT: sub a0, a0, a1
|
; RV64IZbb-NEXT: sub a0, a0, a1
|
||||||
; RV64IZbb-NEXT: ret
|
; RV64IZbb-NEXT: ret
|
||||||
|
|
Loading…
Reference in New Issue