forked from OSchip/llvm-project
[AArch64][SVE] More unpredicated ld1/st1 patterns for reg+reg addressing modes
In some cases, we can improve the generated code by using a load with the "wrong" element width: in particular, using ld1b/st1b when we see reg+reg without a shift. Differential Revision: https://reviews.llvm.org/D100527
This commit is contained in:
parent
2d5d720df0
commit
8a40bf6d21
|
@ -1975,6 +1975,25 @@ let Predicates = [HasSVE] in {
|
|||
defm : unpred_load< load, nxv2f32, LD1W_D, LD1W_D_IMM, PTRUE_D, am_sve_regreg_lsl2>;
|
||||
defm : unpred_load< load, nxv2f64, LD1D, LD1D_IMM, PTRUE_D, am_sve_regreg_lsl3>;
|
||||
|
||||
// Allow using the reg+reg form of ld1b/st1b for memory accesses with the
|
||||
// same width as nxv16i8. This saves an add in cases where we would
|
||||
// otherwise compute the address separately.
|
||||
multiclass unpred_loadstore_bitcast<ValueType Ty> {
|
||||
let Predicates = [IsLE] in {
|
||||
def : Pat<(Ty (load (am_sve_regreg_lsl0 GPR64sp:$base, GPR64:$offset))),
|
||||
(LD1B (PTRUE_B 31), GPR64sp:$base, GPR64:$offset)>;
|
||||
def : Pat<(store (Ty ZPR:$val), (am_sve_regreg_lsl0 GPR64sp:$base, GPR64:$offset)),
|
||||
(ST1B ZPR:$val, (PTRUE_B 31), GPR64sp:$base, GPR64:$offset)>;
|
||||
}
|
||||
}
|
||||
defm : unpred_loadstore_bitcast<nxv8i16>;
|
||||
defm : unpred_loadstore_bitcast<nxv8f16>;
|
||||
defm : unpred_loadstore_bitcast<nxv8bf16>;
|
||||
defm : unpred_loadstore_bitcast<nxv4f32>;
|
||||
defm : unpred_loadstore_bitcast<nxv4i32>;
|
||||
defm : unpred_loadstore_bitcast<nxv2i64>;
|
||||
defm : unpred_loadstore_bitcast<nxv2f64>;
|
||||
|
||||
multiclass unpred_store_predicate<ValueType Ty, Instruction Store> {
|
||||
def _fi : Pat<(store (Ty PPR:$val), (am_sve_fi GPR64sp:$base, simm9:$offset)),
|
||||
(Store PPR:$val, GPR64sp:$base, simm9:$offset)>;
|
||||
|
|
|
@ -652,11 +652,12 @@ define <vscale x 8 x i16> @splice_nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i
|
|||
; CHECK-NEXT: addvl sp, sp, #-2
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: mov x8, sp
|
||||
; CHECK-NEXT: ptrue p1.b
|
||||
; CHECK-NEXT: st1h { z0.h }, p0, [sp]
|
||||
; CHECK-NEXT: st1h { z1.h }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: addvl x8, x8, #1
|
||||
; CHECK-NEXT: sub x8, x8, #16 // =16
|
||||
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8]
|
||||
; CHECK-NEXT: mov x9, #-16
|
||||
; CHECK-NEXT: ld1b { z0.b }, p1/z, [x8, x9]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
|
@ -671,11 +672,12 @@ define <vscale x 8 x i16> @splice_nxv8i16_1(<vscale x 8 x i16> %a, <vscale x 8 x
|
|||
; CHECK-NEXT: addvl sp, sp, #-2
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: mov x8, sp
|
||||
; CHECK-NEXT: ptrue p1.b
|
||||
; CHECK-NEXT: st1h { z0.h }, p0, [sp]
|
||||
; CHECK-NEXT: st1h { z1.h }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: addvl x8, x8, #1
|
||||
; CHECK-NEXT: sub x8, x8, #2 // =2
|
||||
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8]
|
||||
; CHECK-NEXT: mov x9, #-2
|
||||
; CHECK-NEXT: ld1b { z0.b }, p1/z, [x8, x9]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
|
@ -714,11 +716,12 @@ define <vscale x 4 x i32> @splice_nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i
|
|||
; CHECK-NEXT: addvl sp, sp, #-2
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: mov x8, sp
|
||||
; CHECK-NEXT: ptrue p1.b
|
||||
; CHECK-NEXT: st1w { z0.s }, p0, [sp]
|
||||
; CHECK-NEXT: st1w { z1.s }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: addvl x8, x8, #1
|
||||
; CHECK-NEXT: sub x8, x8, #16 // =16
|
||||
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8]
|
||||
; CHECK-NEXT: mov x9, #-16
|
||||
; CHECK-NEXT: ld1b { z0.b }, p1/z, [x8, x9]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
|
@ -733,11 +736,12 @@ define <vscale x 4 x i32> @splice_nxv4i32_1(<vscale x 4 x i32> %a, <vscale x 4 x
|
|||
; CHECK-NEXT: addvl sp, sp, #-2
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: mov x8, sp
|
||||
; CHECK-NEXT: ptrue p1.b
|
||||
; CHECK-NEXT: st1w { z0.s }, p0, [sp]
|
||||
; CHECK-NEXT: st1w { z1.s }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: addvl x8, x8, #1
|
||||
; CHECK-NEXT: sub x8, x8, #4 // =4
|
||||
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8]
|
||||
; CHECK-NEXT: mov x9, #-4
|
||||
; CHECK-NEXT: ld1b { z0.b }, p1/z, [x8, x9]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
|
@ -776,11 +780,12 @@ define <vscale x 2 x i64> @splice_nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i
|
|||
; CHECK-NEXT: addvl sp, sp, #-2
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: mov x8, sp
|
||||
; CHECK-NEXT: ptrue p1.b
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
|
||||
; CHECK-NEXT: st1d { z1.d }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: addvl x8, x8, #1
|
||||
; CHECK-NEXT: sub x8, x8, #16 // =16
|
||||
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8]
|
||||
; CHECK-NEXT: mov x9, #-16
|
||||
; CHECK-NEXT: ld1b { z0.b }, p1/z, [x8, x9]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
|
@ -795,11 +800,12 @@ define <vscale x 2 x i64> @splice_nxv2i64_1(<vscale x 2 x i64> %a, <vscale x 2 x
|
|||
; CHECK-NEXT: addvl sp, sp, #-2
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: mov x8, sp
|
||||
; CHECK-NEXT: ptrue p1.b
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
|
||||
; CHECK-NEXT: st1d { z1.d }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: addvl x8, x8, #1
|
||||
; CHECK-NEXT: sub x8, x8, #8 // =8
|
||||
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8]
|
||||
; CHECK-NEXT: mov x9, #-8
|
||||
; CHECK-NEXT: ld1b { z0.b }, p1/z, [x8, x9]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
|
@ -838,11 +844,12 @@ define <vscale x 8 x half> @splice_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x
|
|||
; CHECK-NEXT: addvl sp, sp, #-2
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: mov x8, sp
|
||||
; CHECK-NEXT: ptrue p1.b
|
||||
; CHECK-NEXT: st1h { z0.h }, p0, [sp]
|
||||
; CHECK-NEXT: st1h { z1.h }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: addvl x8, x8, #1
|
||||
; CHECK-NEXT: sub x8, x8, #16 // =16
|
||||
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8]
|
||||
; CHECK-NEXT: mov x9, #-16
|
||||
; CHECK-NEXT: ld1b { z0.b }, p1/z, [x8, x9]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
|
@ -857,11 +864,12 @@ define <vscale x 8 x half> @splice_nxv8f16_1(<vscale x 8 x half> %a, <vscale x 8
|
|||
; CHECK-NEXT: addvl sp, sp, #-2
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: mov x8, sp
|
||||
; CHECK-NEXT: ptrue p1.b
|
||||
; CHECK-NEXT: st1h { z0.h }, p0, [sp]
|
||||
; CHECK-NEXT: st1h { z1.h }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: addvl x8, x8, #1
|
||||
; CHECK-NEXT: sub x8, x8, #2 // =2
|
||||
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8]
|
||||
; CHECK-NEXT: mov x9, #-2
|
||||
; CHECK-NEXT: ld1b { z0.b }, p1/z, [x8, x9]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
|
@ -900,11 +908,12 @@ define <vscale x 4 x float> @splice_nxv4f32(<vscale x 4 x float> %a, <vscale x 4
|
|||
; CHECK-NEXT: addvl sp, sp, #-2
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: mov x8, sp
|
||||
; CHECK-NEXT: ptrue p1.b
|
||||
; CHECK-NEXT: st1w { z0.s }, p0, [sp]
|
||||
; CHECK-NEXT: st1w { z1.s }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: addvl x8, x8, #1
|
||||
; CHECK-NEXT: sub x8, x8, #16 // =16
|
||||
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8]
|
||||
; CHECK-NEXT: mov x9, #-16
|
||||
; CHECK-NEXT: ld1b { z0.b }, p1/z, [x8, x9]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
|
@ -919,11 +928,12 @@ define <vscale x 4 x float> @splice_nxv4f32_1(<vscale x 4 x float> %a, <vscale x
|
|||
; CHECK-NEXT: addvl sp, sp, #-2
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: mov x8, sp
|
||||
; CHECK-NEXT: ptrue p1.b
|
||||
; CHECK-NEXT: st1w { z0.s }, p0, [sp]
|
||||
; CHECK-NEXT: st1w { z1.s }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: addvl x8, x8, #1
|
||||
; CHECK-NEXT: sub x8, x8, #4 // =4
|
||||
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8]
|
||||
; CHECK-NEXT: mov x9, #-4
|
||||
; CHECK-NEXT: ld1b { z0.b }, p1/z, [x8, x9]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
|
@ -962,11 +972,12 @@ define <vscale x 2 x double> @splice_nxv2f64(<vscale x 2 x double> %a, <vscale x
|
|||
; CHECK-NEXT: addvl sp, sp, #-2
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: mov x8, sp
|
||||
; CHECK-NEXT: ptrue p1.b
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
|
||||
; CHECK-NEXT: st1d { z1.d }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: addvl x8, x8, #1
|
||||
; CHECK-NEXT: sub x8, x8, #16 // =16
|
||||
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8]
|
||||
; CHECK-NEXT: mov x9, #-16
|
||||
; CHECK-NEXT: ld1b { z0.b }, p1/z, [x8, x9]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
|
@ -981,11 +992,12 @@ define <vscale x 2 x double> @splice_nxv2f64_1(<vscale x 2 x double> %a, <vscale
|
|||
; CHECK-NEXT: addvl sp, sp, #-2
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: mov x8, sp
|
||||
; CHECK-NEXT: ptrue p1.b
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
|
||||
; CHECK-NEXT: st1d { z1.d }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: addvl x8, x8, #1
|
||||
; CHECK-NEXT: sub x8, x8, #8 // =8
|
||||
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8]
|
||||
; CHECK-NEXT: mov x9, #-8
|
||||
; CHECK-NEXT: ld1b { z0.b }, p1/z, [x8, x9]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
|
@ -1027,11 +1039,12 @@ define <vscale x 2 x i1> @splice_nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1>
|
|||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: mov z1.d, p1/z, #1 // =0x1
|
||||
; CHECK-NEXT: mov x8, sp
|
||||
; CHECK-NEXT: ptrue p1.b
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
|
||||
; CHECK-NEXT: st1d { z1.d }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: addvl x8, x8, #1
|
||||
; CHECK-NEXT: sub x8, x8, #8 // =8
|
||||
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8]
|
||||
; CHECK-NEXT: mov x9, #-8
|
||||
; CHECK-NEXT: ld1b { z0.b }, p1/z, [x8, x9]
|
||||
; CHECK-NEXT: and z0.d, z0.d, #0x1
|
||||
; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
|
@ -1051,11 +1064,12 @@ define <vscale x 4 x i1> @splice_nxv4i1(<vscale x 4 x i1> %a, <vscale x 4 x i1>
|
|||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: mov z1.s, p1/z, #1 // =0x1
|
||||
; CHECK-NEXT: mov x8, sp
|
||||
; CHECK-NEXT: ptrue p1.b
|
||||
; CHECK-NEXT: st1w { z0.s }, p0, [sp]
|
||||
; CHECK-NEXT: st1w { z1.s }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: addvl x8, x8, #1
|
||||
; CHECK-NEXT: sub x8, x8, #4 // =4
|
||||
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8]
|
||||
; CHECK-NEXT: mov x9, #-4
|
||||
; CHECK-NEXT: ld1b { z0.b }, p1/z, [x8, x9]
|
||||
; CHECK-NEXT: and z0.s, z0.s, #0x1
|
||||
; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
|
@ -1075,11 +1089,12 @@ define <vscale x 8 x i1> @splice_nxv8i1(<vscale x 8 x i1> %a, <vscale x 8 x i1>
|
|||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: mov z1.h, p1/z, #1 // =0x1
|
||||
; CHECK-NEXT: mov x8, sp
|
||||
; CHECK-NEXT: ptrue p1.b
|
||||
; CHECK-NEXT: st1h { z0.h }, p0, [sp]
|
||||
; CHECK-NEXT: st1h { z1.h }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: addvl x8, x8, #1
|
||||
; CHECK-NEXT: sub x8, x8, #2 // =2
|
||||
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8]
|
||||
; CHECK-NEXT: mov x9, #-2
|
||||
; CHECK-NEXT: ld1b { z0.b }, p1/z, [x8, x9]
|
||||
; CHECK-NEXT: and z0.h, z0.h, #0x1
|
||||
; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
|
@ -1121,11 +1136,12 @@ define <vscale x 2 x i8> @splice_nxv2i8(<vscale x 2 x i8> %a, <vscale x 2 x i8>
|
|||
; CHECK-NEXT: addvl sp, sp, #-2
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: mov x8, sp
|
||||
; CHECK-NEXT: ptrue p1.b
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
|
||||
; CHECK-NEXT: st1d { z1.d }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: addvl x8, x8, #1
|
||||
; CHECK-NEXT: sub x8, x8, #16 // =16
|
||||
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8]
|
||||
; CHECK-NEXT: mov x9, #-16
|
||||
; CHECK-NEXT: ld1b { z0.b }, p1/z, [x8, x9]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
|
@ -1141,13 +1157,15 @@ define <vscale x 8 x i32> @splice_nxv8i32(<vscale x 8 x i32> %a, <vscale x 8 x i
|
|||
; CHECK-NEXT: addvl sp, sp, #-4
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: mov x8, sp
|
||||
; CHECK-NEXT: ptrue p1.b
|
||||
; CHECK-NEXT: mov x9, #-32
|
||||
; CHECK-NEXT: st1w { z1.s }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: st1w { z0.s }, p0, [sp]
|
||||
; CHECK-NEXT: st1w { z3.s }, p0, [x8, #3, mul vl]
|
||||
; CHECK-NEXT: st1w { z2.s }, p0, [x8, #2, mul vl]
|
||||
; CHECK-NEXT: addvl x8, x8, #2
|
||||
; CHECK-NEXT: ld1b { z0.b }, p1/z, [x8, x9]
|
||||
; CHECK-NEXT: sub x8, x8, #32 // =32
|
||||
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8]
|
||||
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: addvl sp, sp, #4
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
|
|
|
@ -15,6 +15,42 @@ define <vscale x 16 x i8> @ld1_nxv16i8(i8* %addr, i64 %off) {
|
|||
ret <vscale x 16 x i8> %val
|
||||
}
|
||||
|
||||
define <vscale x 8 x i16> @ld1_nxv16i8_bitcast_to_i16(i8* %addr, i64 %off) {
|
||||
; CHECK-LABEL: ld1_nxv16i8_bitcast_to_i16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.b
|
||||
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0, x1]
|
||||
; CHECK-NEXT: ret
|
||||
%ptr = getelementptr inbounds i8, i8* %addr, i64 %off
|
||||
%ptrcast = bitcast i8* %ptr to <vscale x 8 x i16>*
|
||||
%val = load volatile <vscale x 8 x i16>, <vscale x 8 x i16>* %ptrcast
|
||||
ret <vscale x 8 x i16> %val
|
||||
}
|
||||
|
||||
define <vscale x 4 x i32> @ld1_nxv16i8_bitcast_to_i32(i8* %addr, i64 %off) {
|
||||
; CHECK-LABEL: ld1_nxv16i8_bitcast_to_i32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.b
|
||||
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0, x1]
|
||||
; CHECK-NEXT: ret
|
||||
%ptr = getelementptr inbounds i8, i8* %addr, i64 %off
|
||||
%ptrcast = bitcast i8* %ptr to <vscale x 4 x i32>*
|
||||
%val = load volatile <vscale x 4 x i32>, <vscale x 4 x i32>* %ptrcast
|
||||
ret <vscale x 4 x i32> %val
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @ld1_nxv16i8_bitcast_to_i64(i8* %addr, i64 %off) {
|
||||
; CHECK-LABEL: ld1_nxv16i8_bitcast_to_i64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.b
|
||||
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0, x1]
|
||||
; CHECK-NEXT: ret
|
||||
%ptr = getelementptr inbounds i8, i8* %addr, i64 %off
|
||||
%ptrcast = bitcast i8* %ptr to <vscale x 2 x i64>*
|
||||
%val = load volatile <vscale x 2 x i64>, <vscale x 2 x i64>* %ptrcast
|
||||
ret <vscale x 2 x i64> %val
|
||||
}
|
||||
|
||||
define <vscale x 8 x i16> @ld1_nxv8i16_zext8(i8* %addr, i64 %off) {
|
||||
; CHECK-LABEL: ld1_nxv8i16_zext8:
|
||||
; CHECK: // %bb.0:
|
||||
|
|
|
@ -15,6 +15,42 @@ define void @st1_nxv16i8(i8* %addr, i64 %off, <vscale x 16 x i8> %val) {
|
|||
ret void
|
||||
}
|
||||
|
||||
define void @st1_nxv16i8_bitcast_from_i16(i8* %addr, i64 %off, <vscale x 8 x i16> %val) {
|
||||
; CHECK-LABEL: st1_nxv16i8_bitcast_from_i16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.b
|
||||
; CHECK-NEXT: st1b { z0.b }, p0, [x0, x1]
|
||||
; CHECK-NEXT: ret
|
||||
%ptr = getelementptr inbounds i8, i8* %addr, i64 %off
|
||||
%ptrcast = bitcast i8* %ptr to <vscale x 8 x i16>*
|
||||
store <vscale x 8 x i16> %val, <vscale x 8 x i16>* %ptrcast
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @st1_nxv16i8_bitcast_from_i32(i8* %addr, i64 %off, <vscale x 4 x i32> %val) {
|
||||
; CHECK-LABEL: st1_nxv16i8_bitcast_from_i32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.b
|
||||
; CHECK-NEXT: st1b { z0.b }, p0, [x0, x1]
|
||||
; CHECK-NEXT: ret
|
||||
%ptr = getelementptr inbounds i8, i8* %addr, i64 %off
|
||||
%ptrcast = bitcast i8* %ptr to <vscale x 4 x i32>*
|
||||
store <vscale x 4 x i32> %val, <vscale x 4 x i32>* %ptrcast
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @st1_nxv16i8_bitcast_from_i64(i8* %addr, i64 %off, <vscale x 2 x i64> %val) {
|
||||
; CHECK-LABEL: st1_nxv16i8_bitcast_from_i64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.b
|
||||
; CHECK-NEXT: st1b { z0.b }, p0, [x0, x1]
|
||||
; CHECK-NEXT: ret
|
||||
%ptr = getelementptr inbounds i8, i8* %addr, i64 %off
|
||||
%ptrcast = bitcast i8* %ptr to <vscale x 2 x i64>*
|
||||
store <vscale x 2 x i64> %val, <vscale x 2 x i64>* %ptrcast
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @st1_nxv8i16_trunc8(i8* %addr, i64 %off, <vscale x 8 x i16> %val) {
|
||||
; CHECK-LABEL: st1_nxv8i16_trunc8:
|
||||
; CHECK: // %bb.0:
|
||||
|
|
Loading…
Reference in New Issue