forked from OSchip/llvm-project
139 lines
4.6 KiB
LLVM
139 lines
4.6 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+m -verify-machineinstrs < %s | FileCheck %s
|
|
|
|
define void @rvv_vla(i64 %n, i64 %i) nounwind {
|
|
; CHECK-LABEL: rvv_vla:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: addi sp, sp, -32
|
|
; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
|
|
; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
|
|
; CHECK-NEXT: addi s0, sp, 32
|
|
; CHECK-NEXT: csrr a2, vlenb
|
|
; CHECK-NEXT: slli a3, a2, 1
|
|
; CHECK-NEXT: add a2, a3, a2
|
|
; CHECK-NEXT: sub sp, sp, a2
|
|
; CHECK-NEXT: slli a0, a0, 2
|
|
; CHECK-NEXT: addi a0, a0, 15
|
|
; CHECK-NEXT: andi a0, a0, -16
|
|
; CHECK-NEXT: sub a0, sp, a0
|
|
; CHECK-NEXT: mv sp, a0
|
|
; CHECK-NEXT: csrr a2, vlenb
|
|
; CHECK-NEXT: sub a2, s0, a2
|
|
; CHECK-NEXT: addi a2, a2, -32
|
|
; CHECK-NEXT: vl1re64.v v25, (a2)
|
|
; CHECK-NEXT: csrr a2, vlenb
|
|
; CHECK-NEXT: slli a3, a2, 1
|
|
; CHECK-NEXT: add a2, a3, a2
|
|
; CHECK-NEXT: sub a2, s0, a2
|
|
; CHECK-NEXT: addi a2, a2, -32
|
|
; CHECK-NEXT: vl2re64.v v26, (a2)
|
|
; CHECK-NEXT: slli a1, a1, 2
|
|
; CHECK-NEXT: add a0, a0, a1
|
|
; CHECK-NEXT: lw a0, 0(a0)
|
|
; CHECK-NEXT: addi sp, s0, -32
|
|
; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
|
|
; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
|
|
; CHECK-NEXT: addi sp, sp, 32
|
|
; CHECK-NEXT: ret
|
|
%vla.addr = alloca i32, i64 %n
|
|
|
|
%v1.addr = alloca <vscale x 1 x i64>
|
|
%v1 = load volatile <vscale x 1 x i64>, <vscale x 1 x i64>* %v1.addr
|
|
|
|
%v2.addr = alloca <vscale x 2 x i64>
|
|
%v2 = load volatile <vscale x 2 x i64>, <vscale x 2 x i64>* %v2.addr
|
|
|
|
%p = getelementptr i32, i32* %vla.addr, i64 %i
|
|
%s = load volatile i32, i32* %p
|
|
ret void
|
|
}
|
|
|
|
define void @rvv_overaligned() nounwind {
|
|
; CHECK-LABEL: rvv_overaligned:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: addi sp, sp, -128
|
|
; CHECK-NEXT: sd ra, 120(sp) # 8-byte Folded Spill
|
|
; CHECK-NEXT: sd s0, 112(sp) # 8-byte Folded Spill
|
|
; CHECK-NEXT: addi s0, sp, 128
|
|
; CHECK-NEXT: csrr a0, vlenb
|
|
; CHECK-NEXT: slli a1, a0, 1
|
|
; CHECK-NEXT: add a0, a1, a0
|
|
; CHECK-NEXT: sub sp, sp, a0
|
|
; CHECK-NEXT: andi sp, sp, -64
|
|
; CHECK-NEXT: csrr a0, vlenb
|
|
; CHECK-NEXT: slli a0, a0, 1
|
|
; CHECK-NEXT: add a0, sp, a0
|
|
; CHECK-NEXT: addi a0, a0, 112
|
|
; CHECK-NEXT: vl1re64.v v25, (a0)
|
|
; CHECK-NEXT: addi a0, sp, 112
|
|
; CHECK-NEXT: vl2re64.v v26, (a0)
|
|
; CHECK-NEXT: lw a0, 64(sp)
|
|
; CHECK-NEXT: addi sp, s0, -128
|
|
; CHECK-NEXT: ld s0, 112(sp) # 8-byte Folded Reload
|
|
; CHECK-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
|
|
; CHECK-NEXT: addi sp, sp, 128
|
|
; CHECK-NEXT: ret
|
|
%overaligned = alloca i32, align 64
|
|
|
|
%v1.addr = alloca <vscale x 1 x i64>
|
|
%v1 = load volatile <vscale x 1 x i64>, <vscale x 1 x i64>* %v1.addr
|
|
|
|
%v2.addr = alloca <vscale x 2 x i64>
|
|
%v2 = load volatile <vscale x 2 x i64>, <vscale x 2 x i64>* %v2.addr
|
|
|
|
%s = load volatile i32, i32* %overaligned, align 64
|
|
ret void
|
|
}
|
|
|
|
define void @rvv_vla_and_overaligned(i64 %n, i64 %i) nounwind {
|
|
; CHECK-LABEL: rvv_vla_and_overaligned:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: addi sp, sp, -128
|
|
; CHECK-NEXT: sd ra, 120(sp) # 8-byte Folded Spill
|
|
; CHECK-NEXT: sd s0, 112(sp) # 8-byte Folded Spill
|
|
; CHECK-NEXT: sd s1, 104(sp) # 8-byte Folded Spill
|
|
; CHECK-NEXT: addi s0, sp, 128
|
|
; CHECK-NEXT: csrr a2, vlenb
|
|
; CHECK-NEXT: slli a3, a2, 1
|
|
; CHECK-NEXT: add a2, a3, a2
|
|
; CHECK-NEXT: sub sp, sp, a2
|
|
; CHECK-NEXT: andi sp, sp, -64
|
|
; CHECK-NEXT: mv s1, sp
|
|
; CHECK-NEXT: slli a0, a0, 2
|
|
; CHECK-NEXT: addi a0, a0, 15
|
|
; CHECK-NEXT: andi a0, a0, -16
|
|
; CHECK-NEXT: sub a0, sp, a0
|
|
; CHECK-NEXT: mv sp, a0
|
|
; CHECK-NEXT: csrr a2, vlenb
|
|
; CHECK-NEXT: slli a2, a2, 1
|
|
; CHECK-NEXT: add a2, s1, a2
|
|
; CHECK-NEXT: addi a2, a2, 104
|
|
; CHECK-NEXT: vl1re64.v v25, (a2)
|
|
; CHECK-NEXT: addi a2, s1, 104
|
|
; CHECK-NEXT: vl2re64.v v26, (a2)
|
|
; CHECK-NEXT: lw a2, 64(s1)
|
|
; CHECK-NEXT: slli a1, a1, 2
|
|
; CHECK-NEXT: add a0, a0, a1
|
|
; CHECK-NEXT: lw a0, 0(a0)
|
|
; CHECK-NEXT: addi sp, s0, -128
|
|
; CHECK-NEXT: ld s1, 104(sp) # 8-byte Folded Reload
|
|
; CHECK-NEXT: ld s0, 112(sp) # 8-byte Folded Reload
|
|
; CHECK-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
|
|
; CHECK-NEXT: addi sp, sp, 128
|
|
; CHECK-NEXT: ret
|
|
%overaligned = alloca i32, align 64
|
|
%vla.addr = alloca i32, i64 %n
|
|
|
|
%v1.addr = alloca <vscale x 1 x i64>
|
|
%v1 = load volatile <vscale x 1 x i64>, <vscale x 1 x i64>* %v1.addr
|
|
|
|
%v2.addr = alloca <vscale x 2 x i64>
|
|
%v2 = load volatile <vscale x 2 x i64>, <vscale x 2 x i64>* %v2.addr
|
|
|
|
%s1 = load volatile i32, i32* %overaligned, align 64
|
|
%p = getelementptr i32, i32* %vla.addr, i64 %i
|
|
%s2 = load volatile i32, i32* %p
|
|
ret void
|
|
|
|
}
|