forked from OSchip/llvm-project
[RISCV] Enable isTruncateFree in SDAG for i64->i32 on rv64.
We have a good selection of W instructions, so promoting a truncated value back to i64 is often free. This appears to be a net code size reduction on SPECINT2006. This has been split from D130397 as one of the patches needed to complete that. Reviewed By: reames Differential Revision: https://reviews.llvm.org/D131819
This commit is contained in:
parent
ef8c34e954
commit
7a73ab5818
|
@ -1104,6 +1104,8 @@ bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const {
|
|||
// On RV32, 64-bit integers are split into their high and low parts and held
|
||||
// in two different registers, so the trunc is free since the low register can
|
||||
// just be used.
|
||||
// FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of
|
||||
// isTruncateFree?
|
||||
bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
|
||||
if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
|
||||
return false;
|
||||
|
@ -1113,8 +1115,10 @@ bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
|
|||
}
|
||||
|
||||
bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
|
||||
if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() ||
|
||||
!SrcVT.isInteger() || !DstVT.isInteger())
|
||||
// We consider i64->i32 free on RV64 since we have good selection of W
|
||||
// instructions that make promoting operations back to i64 free in many cases.
|
||||
if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
|
||||
!DstVT.isInteger())
|
||||
return false;
|
||||
unsigned SrcBits = SrcVT.getSizeInBits();
|
||||
unsigned DestBits = DstVT.getSizeInBits();
|
||||
|
|
|
@ -1725,17 +1725,10 @@ define void @mscatter_v2i64_truncstore_v2i32(<2 x i64> %val, <2 x i32*> %ptrs, <
|
|||
;
|
||||
; RV64ZVE32F-LABEL: mscatter_v2i64_truncstore_v2i32:
|
||||
; RV64ZVE32F: # %bb.0:
|
||||
; RV64ZVE32F-NEXT: addi sp, sp, -16
|
||||
; RV64ZVE32F-NEXT: .cfi_def_cfa_offset 16
|
||||
; RV64ZVE32F-NEXT: sw a1, 12(sp)
|
||||
; RV64ZVE32F-NEXT: sw a0, 8(sp)
|
||||
; RV64ZVE32F-NEXT: addi a0, sp, 12
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu
|
||||
; RV64ZVE32F-NEXT: vle32.v v9, (a0)
|
||||
; RV64ZVE32F-NEXT: addi a0, sp, 8
|
||||
; RV64ZVE32F-NEXT: vle32.v v8, (a0)
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, mu
|
||||
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu
|
||||
; RV64ZVE32F-NEXT: vmv.v.x v8, a1
|
||||
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, tu, mu
|
||||
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
|
||||
; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a0, v0
|
||||
; RV64ZVE32F-NEXT: andi a1, a0, 1
|
||||
|
@ -1744,7 +1737,6 @@ define void @mscatter_v2i64_truncstore_v2i32(<2 x i64> %val, <2 x i32*> %ptrs, <
|
|||
; RV64ZVE32F-NEXT: andi a0, a0, 2
|
||||
; RV64ZVE32F-NEXT: bnez a0, .LBB24_4
|
||||
; RV64ZVE32F-NEXT: .LBB24_2: # %else2
|
||||
; RV64ZVE32F-NEXT: addi sp, sp, 16
|
||||
; RV64ZVE32F-NEXT: ret
|
||||
; RV64ZVE32F-NEXT: .LBB24_3: # %cond.store
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu
|
||||
|
@ -1755,7 +1747,6 @@ define void @mscatter_v2i64_truncstore_v2i32(<2 x i64> %val, <2 x i32*> %ptrs, <
|
|||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
|
||||
; RV64ZVE32F-NEXT: vse32.v v8, (a3)
|
||||
; RV64ZVE32F-NEXT: addi sp, sp, 16
|
||||
; RV64ZVE32F-NEXT: ret
|
||||
%tval = trunc <2 x i64> %val to <2 x i32>
|
||||
call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> %tval, <2 x i32*> %ptrs, i32 4, <2 x i1> %m)
|
||||
|
|
|
@ -0,0 +1,31 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=riscv64 | FileCheck %s
|
||||
|
||||
; Make sure we use lwu for the load, and don't emit
|
||||
; a sext.w for the compare. This requires isTruncateFree
|
||||
; to return true for i64->i32. Otherwise we emit a
|
||||
; lw and a shift pair for the zext.
|
||||
|
||||
define void @foo(i32* %p, i64* %q, i32* %r) {
|
||||
; CHECK-LABEL: foo:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: lwu a0, 0(a0)
|
||||
; CHECK-NEXT: sd a0, 0(a1)
|
||||
; CHECK-NEXT: beqz a0, .LBB0_2
|
||||
; CHECK-NEXT: # %bb.1: # %if
|
||||
; CHECK-NEXT: sw a0, 0(a2)
|
||||
; CHECK-NEXT: .LBB0_2: # %end
|
||||
; CHECK-NEXT: ret
|
||||
%a = load i32, i32* %p
|
||||
%b = zext i32 %a to i64
|
||||
store i64 %b, i64* %q
|
||||
%c = icmp ne i32 %a, 0
|
||||
br i1 %c, label %if, label %end
|
||||
|
||||
if:
|
||||
store i32 %a, i32* %r
|
||||
br label %end
|
||||
|
||||
end:
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue