llvm-project/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

90 lines
3.9 KiB
LLVM
Raw Normal View History

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -mattr=+d -O0 < %s \
; RUN: | FileCheck --check-prefix=SPILL-O0 %s
; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -mattr=+d -O2 < %s \
; RUN: | FileCheck --check-prefix=SPILL-O2 %s
@.str = private unnamed_addr constant [6 x i8] c"hello\00", align 1
define <vscale x 1 x double> @foo(<vscale x 1 x double> %a, <vscale x 1 x double> %b, <vscale x 1 x double> %c, i64 %gvl) nounwind
; SPILL-O0-LABEL: foo:
; SPILL-O0: # %bb.0:
; SPILL-O0-NEXT: addi sp, sp, -32
; SPILL-O0-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
; SPILL-O0-NEXT: csrr a1, vlenb
; SPILL-O0-NEXT: slli a1, a1, 1
; SPILL-O0-NEXT: sub sp, sp, a1
; SPILL-O0-NEXT: sd a0, 16(sp) # 8-byte Folded Spill
; SPILL-O0-NEXT: csrr a1, vlenb
; SPILL-O0-NEXT: add a1, sp, a1
[RISCV] Fix offset computation for RVV In D97111 we changed the RVV frame layout when using sp or bp to address the stack slots so we could address the emergency stack slot. The idea is to put the RVV objects as far as possible (in offset terms) from the frame reference register (sp / fp / bp). When using fp this happens naturally because the RVV objects are already the top of the stack and due to the constraints of RVV (VLENB being a power of two >= 128) the stack remains aligned. The rest of this summary does not apply to this case. When using sp / bp we need to skip the non-RVV stack slots. The size of the the non-RVV objects is computed subtracting the callee saved register size (whose computation is added in D97111 itself) to the total size of the stack (which does not account for RVV stack slots). However, when doing so we round to 16 bytes when computing that size and we end emitting a smaller offset that may belong to a scalar stack slot (see D98801). So this change removes that rounding. Also, because we want the RVV objects be between the non-RVV stack slots and the callee-saved register slots, we need to make sure the RVV objects are properly aligned to 8 bytes. Adding a padding of 8 would render the stack unaligned. So when allocating space for RVV (only when we don't use fp) we need to have extra padding that preserves the stack alignment. This way we can round to 8 bytes the offset that skips the non-RVV objects and we do not misalign the whole stack in the way. In some circumstances this means that the RVV objects may have padding before (=lower offsets from sp/bp) and after (before the CSR stack slots). Differential Revision: https://reviews.llvm.org/D98802
2021-03-18 18:26:33 +08:00
; SPILL-O0-NEXT: addi a1, a1, 24
; SPILL-O0-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill
; SPILL-O0-NEXT: vsetvli zero, a0, e64, m1, ta, mu
; SPILL-O0-NEXT: vfadd.vv v25, v8, v9
[RISCV] Fix offset computation for RVV In D97111 we changed the RVV frame layout when using sp or bp to address the stack slots so we could address the emergency stack slot. The idea is to put the RVV objects as far as possible (in offset terms) from the frame reference register (sp / fp / bp). When using fp this happens naturally because the RVV objects are already the top of the stack and due to the constraints of RVV (VLENB being a power of two >= 128) the stack remains aligned. The rest of this summary does not apply to this case. When using sp / bp we need to skip the non-RVV stack slots. The size of the the non-RVV objects is computed subtracting the callee saved register size (whose computation is added in D97111 itself) to the total size of the stack (which does not account for RVV stack slots). However, when doing so we round to 16 bytes when computing that size and we end emitting a smaller offset that may belong to a scalar stack slot (see D98801). So this change removes that rounding. Also, because we want the RVV objects be between the non-RVV stack slots and the callee-saved register slots, we need to make sure the RVV objects are properly aligned to 8 bytes. Adding a padding of 8 would render the stack unaligned. So when allocating space for RVV (only when we don't use fp) we need to have extra padding that preserves the stack alignment. This way we can round to 8 bytes the offset that skips the non-RVV objects and we do not misalign the whole stack in the way. In some circumstances this means that the RVV objects may have padding before (=lower offsets from sp/bp) and after (before the CSR stack slots). Differential Revision: https://reviews.llvm.org/D98802
2021-03-18 18:26:33 +08:00
; SPILL-O0-NEXT: addi a0, sp, 24
; SPILL-O0-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill
; SPILL-O0-NEXT: lui a0, %hi(.L.str)
; SPILL-O0-NEXT: addi a0, a0, %lo(.L.str)
; SPILL-O0-NEXT: call puts@plt
[RISCV] Fix offset computation for RVV In D97111 we changed the RVV frame layout when using sp or bp to address the stack slots so we could address the emergency stack slot. The idea is to put the RVV objects as far as possible (in offset terms) from the frame reference register (sp / fp / bp). When using fp this happens naturally because the RVV objects are already the top of the stack and due to the constraints of RVV (VLENB being a power of two >= 128) the stack remains aligned. The rest of this summary does not apply to this case. When using sp / bp we need to skip the non-RVV stack slots. The size of the the non-RVV objects is computed subtracting the callee saved register size (whose computation is added in D97111 itself) to the total size of the stack (which does not account for RVV stack slots). However, when doing so we round to 16 bytes when computing that size and we end emitting a smaller offset that may belong to a scalar stack slot (see D98801). So this change removes that rounding. Also, because we want the RVV objects be between the non-RVV stack slots and the callee-saved register slots, we need to make sure the RVV objects are properly aligned to 8 bytes. Adding a padding of 8 would render the stack unaligned. So when allocating space for RVV (only when we don't use fp) we need to have extra padding that preserves the stack alignment. This way we can round to 8 bytes the offset that skips the non-RVV objects and we do not misalign the whole stack in the way. In some circumstances this means that the RVV objects may have padding before (=lower offsets from sp/bp) and after (before the CSR stack slots). Differential Revision: https://reviews.llvm.org/D98802
2021-03-18 18:26:33 +08:00
; SPILL-O0-NEXT: addi a1, sp, 24
; SPILL-O0-NEXT: vl1r.v v25, (a1) # Unknown-size Folded Reload
; SPILL-O0-NEXT: csrr a1, vlenb
; SPILL-O0-NEXT: add a1, sp, a1
[RISCV] Fix offset computation for RVV In D97111 we changed the RVV frame layout when using sp or bp to address the stack slots so we could address the emergency stack slot. The idea is to put the RVV objects as far as possible (in offset terms) from the frame reference register (sp / fp / bp). When using fp this happens naturally because the RVV objects are already the top of the stack and due to the constraints of RVV (VLENB being a power of two >= 128) the stack remains aligned. The rest of this summary does not apply to this case. When using sp / bp we need to skip the non-RVV stack slots. The size of the the non-RVV objects is computed subtracting the callee saved register size (whose computation is added in D97111 itself) to the total size of the stack (which does not account for RVV stack slots). However, when doing so we round to 16 bytes when computing that size and we end emitting a smaller offset that may belong to a scalar stack slot (see D98801). So this change removes that rounding. Also, because we want the RVV objects be between the non-RVV stack slots and the callee-saved register slots, we need to make sure the RVV objects are properly aligned to 8 bytes. Adding a padding of 8 would render the stack unaligned. So when allocating space for RVV (only when we don't use fp) we need to have extra padding that preserves the stack alignment. This way we can round to 8 bytes the offset that skips the non-RVV objects and we do not misalign the whole stack in the way. In some circumstances this means that the RVV objects may have padding before (=lower offsets from sp/bp) and after (before the CSR stack slots). Differential Revision: https://reviews.llvm.org/D98802
2021-03-18 18:26:33 +08:00
; SPILL-O0-NEXT: addi a1, a1, 24
; SPILL-O0-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload
[RISCV] Fix offset computation for RVV In D97111 we changed the RVV frame layout when using sp or bp to address the stack slots so we could address the emergency stack slot. The idea is to put the RVV objects as far as possible (in offset terms) from the frame reference register (sp / fp / bp). When using fp this happens naturally because the RVV objects are already the top of the stack and due to the constraints of RVV (VLENB being a power of two >= 128) the stack remains aligned. The rest of this summary does not apply to this case. When using sp / bp we need to skip the non-RVV stack slots. The size of the the non-RVV objects is computed subtracting the callee saved register size (whose computation is added in D97111 itself) to the total size of the stack (which does not account for RVV stack slots). However, when doing so we round to 16 bytes when computing that size and we end emitting a smaller offset that may belong to a scalar stack slot (see D98801). So this change removes that rounding. Also, because we want the RVV objects be between the non-RVV stack slots and the callee-saved register slots, we need to make sure the RVV objects are properly aligned to 8 bytes. Adding a padding of 8 would render the stack unaligned. So when allocating space for RVV (only when we don't use fp) we need to have extra padding that preserves the stack alignment. This way we can round to 8 bytes the offset that skips the non-RVV objects and we do not misalign the whole stack in the way. In some circumstances this means that the RVV objects may have padding before (=lower offsets from sp/bp) and after (before the CSR stack slots). Differential Revision: https://reviews.llvm.org/D98802
2021-03-18 18:26:33 +08:00
; SPILL-O0-NEXT: # kill: def $x11 killed $x10
; SPILL-O0-NEXT: ld a0, 16(sp) # 8-byte Folded Reload
; SPILL-O0-NEXT: vsetvli zero, a0, e64, m1, ta, mu
; SPILL-O0-NEXT: vfadd.vv v8, v8, v25
; SPILL-O0-NEXT: csrr a0, vlenb
; SPILL-O0-NEXT: slli a0, a0, 1
; SPILL-O0-NEXT: add sp, sp, a0
; SPILL-O0-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; SPILL-O0-NEXT: addi sp, sp, 32
; SPILL-O0-NEXT: ret
;
; SPILL-O2-LABEL: foo:
; SPILL-O2: # %bb.0:
; SPILL-O2-NEXT: addi sp, sp, -32
; SPILL-O2-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
; SPILL-O2-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
; SPILL-O2-NEXT: csrr a1, vlenb
; SPILL-O2-NEXT: slli a1, a1, 1
; SPILL-O2-NEXT: sub sp, sp, a1
; SPILL-O2-NEXT: mv s0, a0
; SPILL-O2-NEXT: addi a1, sp, 16
; SPILL-O2-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill
; SPILL-O2-NEXT: vsetvli zero, a0, e64, m1, ta, mu
; SPILL-O2-NEXT: vfadd.vv v25, v8, v9
; SPILL-O2-NEXT: csrr a0, vlenb
; SPILL-O2-NEXT: add a0, sp, a0
; SPILL-O2-NEXT: addi a0, a0, 16
; SPILL-O2-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill
; SPILL-O2-NEXT: lui a0, %hi(.L.str)
; SPILL-O2-NEXT: addi a0, a0, %lo(.L.str)
; SPILL-O2-NEXT: call puts@plt
; SPILL-O2-NEXT: vsetvli zero, s0, e64, m1, ta, mu
; SPILL-O2-NEXT: csrr a0, vlenb
; SPILL-O2-NEXT: add a0, sp, a0
; SPILL-O2-NEXT: addi a0, a0, 16
; SPILL-O2-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload
; SPILL-O2-NEXT: addi a0, sp, 16
; SPILL-O2-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload
; SPILL-O2-NEXT: vfadd.vv v8, v26, v25
; SPILL-O2-NEXT: csrr a0, vlenb
; SPILL-O2-NEXT: slli a0, a0, 1
; SPILL-O2-NEXT: add sp, sp, a0
; SPILL-O2-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; SPILL-O2-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; SPILL-O2-NEXT: addi sp, sp, 32
; SPILL-O2-NEXT: ret
{
%x = call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 %gvl)
%call = call signext i32 @puts(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i64 0, i64 0))
%z = call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> %a, <vscale x 1 x double> %x, i64 %gvl)
ret <vscale x 1 x double> %z
}
declare <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 %gvl)
declare i32 @puts(i8*);