[AArch64] Allow copying of SVE registers in Streaming SVE

Reviewed By: sdesmalen

Differential Revision: https://reviews.llvm.org/D118562
This commit is contained in:
Cullen Rhodes 2022-03-03 09:20:22 +00:00
parent 616586794b
commit e4fa8291a2
4 changed files with 64 additions and 53 deletions

View File

@ -3462,7 +3462,8 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// Copy a Predicate register by ORRing with itself.
if (AArch64::PPRRegClass.contains(DestReg) &&
AArch64::PPRRegClass.contains(SrcReg)) {
assert(Subtarget.hasSVE() && "Unexpected SVE register.");
assert((Subtarget.hasSVE() || Subtarget.hasStreamingSVE()) &&
"Unexpected SVE register.");
BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), DestReg)
.addReg(SrcReg) // Pg
.addReg(SrcReg)
@ -3473,7 +3474,8 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// Copy a Z register by ORRing with itself.
if (AArch64::ZPRRegClass.contains(DestReg) &&
AArch64::ZPRRegClass.contains(SrcReg)) {
assert(Subtarget.hasSVE() && "Unexpected SVE register.");
assert((Subtarget.hasSVE() || Subtarget.hasStreamingSVE()) &&
"Unexpected SVE register.");
BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ), DestReg)
.addReg(SrcReg)
.addReg(SrcReg, getKillRegState(KillSrc));
@ -3483,6 +3485,8 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// Copy a Z register pair by copying the individual sub-registers.
if (AArch64::ZPR2RegClass.contains(DestReg) &&
AArch64::ZPR2RegClass.contains(SrcReg)) {
assert((Subtarget.hasSVE() || Subtarget.hasStreamingSVE()) &&
"Unexpected SVE register.");
static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1};
copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
Indices);
@ -3492,6 +3496,8 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// Copy a Z register triple by copying the individual sub-registers.
if (AArch64::ZPR3RegClass.contains(DestReg) &&
AArch64::ZPR3RegClass.contains(SrcReg)) {
assert((Subtarget.hasSVE() || Subtarget.hasStreamingSVE()) &&
"Unexpected SVE register.");
static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
AArch64::zsub2};
copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
@ -3502,6 +3508,8 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// Copy a Z register quad by copying the individual sub-registers.
if (AArch64::ZPR4RegClass.contains(DestReg) &&
AArch64::ZPR4RegClass.contains(SrcReg)) {
assert((Subtarget.hasSVE() || Subtarget.hasStreamingSVE()) &&
"Unexpected SVE register.");
static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
AArch64::zsub2, AArch64::zsub3};
copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,

View File

@ -1,10 +1,11 @@
; RUN: llc -mtriple aarch64 -mattr=+sve -asm-verbose=1 < %s | FileCheck %s
; RUN: llc -mtriple aarch64 -mattr=+streaming-sve -asm-verbose=1 < %s | FileCheck %s
;
; SVCREATE2 (i8)
;
define <vscale x 16 x i8> @test_svcreate2_s8_vec0(i1 %p, <vscale x 16 x i8> %z0, <vscale x 16 x i8> %z1) local_unnamed_addr #0 {
define <vscale x 16 x i8> @test_svcreate2_s8_vec0(i1 %p, <vscale x 16 x i8> %z0, <vscale x 16 x i8> %z1) #0 {
; CHECK-LABEL: test_svcreate2_s8_vec0:
; CHECK: // %L2
; CHECK-NEXT: ret
@ -17,7 +18,7 @@ L2:
ret <vscale x 16 x i8> %extract
}
define <vscale x 16 x i8> @test_svcreate2_s8_vec1(i1 %p, <vscale x 16 x i8> %z0, <vscale x 16 x i8> %z1) local_unnamed_addr #0 {
define <vscale x 16 x i8> @test_svcreate2_s8_vec1(i1 %p, <vscale x 16 x i8> %z0, <vscale x 16 x i8> %z1) #0 {
; CHECK-LABEL: test_svcreate2_s8_vec1:
; CHECK: // %L2
; CHECK-NEXT: mov z0.d, z1.d
@ -35,7 +36,7 @@ L2:
; SVCREATE2 (i16)
;
define <vscale x 8 x i16> @test_svcreate2_s16_vec0(i1 %p, <vscale x 8 x i16> %z0, <vscale x 8 x i16> %z1) local_unnamed_addr #0 {
define <vscale x 8 x i16> @test_svcreate2_s16_vec0(i1 %p, <vscale x 8 x i16> %z0, <vscale x 8 x i16> %z1) #0 {
; CHECK-LABEL: test_svcreate2_s16_vec0:
; CHECK: // %L2
; CHECK-NEXT: ret
@ -48,7 +49,7 @@ L2:
ret <vscale x 8 x i16> %extract
}
define <vscale x 8 x i16> @test_svcreate2_s16_vec1(i1 %p, <vscale x 8 x i16> %z0, <vscale x 8 x i16> %z1) local_unnamed_addr #0 {
define <vscale x 8 x i16> @test_svcreate2_s16_vec1(i1 %p, <vscale x 8 x i16> %z0, <vscale x 8 x i16> %z1) #0 {
; CHECK-LABEL: test_svcreate2_s16_vec1:
; CHECK: // %L2
; CHECK-NEXT: mov z0.d, z1.d
@ -66,7 +67,7 @@ L2:
; SVCREATE2 (half)
;
define <vscale x 8 x half> @test_svcreate2_f16_vec0(i1 %p, <vscale x 8 x half> %z0, <vscale x 8 x half> %z1) local_unnamed_addr #0 {
define <vscale x 8 x half> @test_svcreate2_f16_vec0(i1 %p, <vscale x 8 x half> %z0, <vscale x 8 x half> %z1) #0 {
; CHECK-LABEL: test_svcreate2_f16_vec0:
; CHECK: // %L2
; CHECK-NEXT: ret
@ -79,7 +80,7 @@ L2:
ret <vscale x 8 x half> %extract
}
define <vscale x 8 x half> @test_svcreate2_f16_vec1(i1 %p, <vscale x 8 x half> %z0, <vscale x 8 x half> %z1) local_unnamed_addr #0 {
define <vscale x 8 x half> @test_svcreate2_f16_vec1(i1 %p, <vscale x 8 x half> %z0, <vscale x 8 x half> %z1) #0 {
; CHECK-LABEL: test_svcreate2_f16_vec1:
; CHECK: // %L2
; CHECK-NEXT: mov z0.d, z1.d
@ -97,7 +98,7 @@ L2:
; SVCREATE2 (bfloat)
;
define <vscale x 8 x bfloat> @test_svcreate2_bf16_vec0(i1 %p, <vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1) local_unnamed_addr #1 {
define <vscale x 8 x bfloat> @test_svcreate2_bf16_vec0(i1 %p, <vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1) #1 {
; CHECK-LABEL: test_svcreate2_bf16_vec0:
; CHECK: // %L2
; CHECK-NEXT: ret
@ -110,7 +111,7 @@ L2:
ret <vscale x 8 x bfloat> %extract
}
define <vscale x 8 x bfloat> @test_svcreate2_bf16_vec1(i1 %p, <vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1) local_unnamed_addr #1 {
define <vscale x 8 x bfloat> @test_svcreate2_bf16_vec1(i1 %p, <vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1) #1 {
; CHECK-LABEL: test_svcreate2_bf16_vec1:
; CHECK: // %L2
; CHECK-NEXT: mov z0.d, z1.d
@ -128,7 +129,7 @@ L2:
; SVCREATE2 (i32)
;
define <vscale x 4 x i32> @test_svcreate2_s32_vec0(i1 %p, <vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1) local_unnamed_addr #0 {
define <vscale x 4 x i32> @test_svcreate2_s32_vec0(i1 %p, <vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1) #0 {
; CHECK-LABEL: test_svcreate2_s32_vec0:
; CHECK: // %L2
; CHECK-NEXT: ret
@ -141,7 +142,7 @@ L2:
ret <vscale x 4 x i32> %extract
}
define <vscale x 4 x i32> @test_svcreate2_s32_vec1(i1 %p, <vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1) local_unnamed_addr #0 {
define <vscale x 4 x i32> @test_svcreate2_s32_vec1(i1 %p, <vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1) #0 {
; CHECK-LABEL: test_svcreate2_s32_vec1:
; CHECK: // %L2
; CHECK-NEXT: mov z0.d, z1.d
@ -159,7 +160,7 @@ L2:
; SVCREATE2 (float)
;
define <vscale x 4 x float> @test_svcreate2_f32_vec0(i1 %p, <vscale x 4 x float> %z0, <vscale x 4 x float> %z1) local_unnamed_addr #0 {
define <vscale x 4 x float> @test_svcreate2_f32_vec0(i1 %p, <vscale x 4 x float> %z0, <vscale x 4 x float> %z1) #0 {
; CHECK-LABEL: test_svcreate2_f32_vec0:
; CHECK: // %L2
; CHECK-NEXT: ret
@ -172,7 +173,7 @@ L2:
ret <vscale x 4 x float> %extract
}
define <vscale x 4 x float> @test_svcreate2_f32_vec1(i1 %p, <vscale x 4 x float> %z0, <vscale x 4 x float> %z1) local_unnamed_addr #0 {
define <vscale x 4 x float> @test_svcreate2_f32_vec1(i1 %p, <vscale x 4 x float> %z0, <vscale x 4 x float> %z1) #0 {
; CHECK-LABEL: test_svcreate2_f32_vec1:
; CHECK: // %L2
; CHECK-NEXT: mov z0.d, z1.d
@ -190,7 +191,7 @@ L2:
; SVCREATE2 (i64)
;
define <vscale x 2 x i64> @test_svcreate2_s64_vec0(i1 %p, <vscale x 2 x i64> %z0, <vscale x 2 x i64> %z1) local_unnamed_addr #0 {
define <vscale x 2 x i64> @test_svcreate2_s64_vec0(i1 %p, <vscale x 2 x i64> %z0, <vscale x 2 x i64> %z1) #0 {
; CHECK-LABEL: test_svcreate2_s64_vec0:
; CHECK: // %L2
; CHECK-NEXT: ret
@ -203,7 +204,7 @@ L2:
ret <vscale x 2 x i64> %extract
}
define <vscale x 2 x i64> @test_svcreate2_s64_vec1(i1 %p, <vscale x 2 x i64> %z0, <vscale x 2 x i64> %z1) local_unnamed_addr #0 {
define <vscale x 2 x i64> @test_svcreate2_s64_vec1(i1 %p, <vscale x 2 x i64> %z0, <vscale x 2 x i64> %z1) #0 {
; CHECK-LABEL: test_svcreate2_s64_vec1:
; CHECK: // %L2
; CHECK-NEXT: mov z0.d, z1.d
@ -221,7 +222,7 @@ L2:
; SVCREATE2 (double)
;
define <vscale x 2 x double> @test_svcreate2_f64_vec0(i1 %p, <vscale x 2 x double> %z0, <vscale x 2 x double> %z1) local_unnamed_addr #0 {
define <vscale x 2 x double> @test_svcreate2_f64_vec0(i1 %p, <vscale x 2 x double> %z0, <vscale x 2 x double> %z1) #0 {
; CHECK-LABEL: test_svcreate2_f64_vec0:
; CHECK: // %L2
; CHECK-NEXT: ret
@ -234,7 +235,7 @@ L2:
ret <vscale x 2 x double> %extract
}
define <vscale x 2 x double> @test_svcreate2_f64_vec1(i1 %p, <vscale x 2 x double> %z0, <vscale x 2 x double> %z1) local_unnamed_addr #0 {
define <vscale x 2 x double> @test_svcreate2_f64_vec1(i1 %p, <vscale x 2 x double> %z0, <vscale x 2 x double> %z1) #0 {
; CHECK-LABEL: test_svcreate2_f64_vec1:
; CHECK: // %L2
; CHECK-NEXT: mov z0.d, z1.d
@ -252,7 +253,7 @@ L2:
; SVCREATE3 (i8)
;
define <vscale x 16 x i8> @test_svcreate3_s8_vec0(i1 %p, <vscale x 16 x i8> %z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2) local_unnamed_addr #0 {
define <vscale x 16 x i8> @test_svcreate3_s8_vec0(i1 %p, <vscale x 16 x i8> %z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2) #0 {
; CHECK-LABEL: test_svcreate3_s8_vec0:
; CHECK: // %L2
; CHECK-NEXT: ret
@ -265,7 +266,7 @@ L2:
ret <vscale x 16 x i8> %extract
}
define <vscale x 16 x i8> @test_svcreate3_s8_vec2(i1 %p, <vscale x 16 x i8> %z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2) local_unnamed_addr #0 {
define <vscale x 16 x i8> @test_svcreate3_s8_vec2(i1 %p, <vscale x 16 x i8> %z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2) #0 {
; CHECK-LABEL: test_svcreate3_s8_vec2:
; CHECK: // %L2
; CHECK-NEXT: mov z0.d, z2.d
@ -283,7 +284,7 @@ L2:
; SVCREATE3 (i16)
;
define <vscale x 8 x i16> @test_svcreate3_s16_vec0(i1 %p, <vscale x 8 x i16> %z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2) local_unnamed_addr #0 {
define <vscale x 8 x i16> @test_svcreate3_s16_vec0(i1 %p, <vscale x 8 x i16> %z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2) #0 {
; CHECK-LABEL: test_svcreate3_s16_vec0:
; CHECK: // %L2
; CHECK-NEXT: ret
@ -296,7 +297,7 @@ L2:
ret <vscale x 8 x i16> %extract
}
define <vscale x 8 x i16> @test_svcreate3_s16_vec2(i1 %p, <vscale x 8 x i16> %z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2) local_unnamed_addr #0 {
define <vscale x 8 x i16> @test_svcreate3_s16_vec2(i1 %p, <vscale x 8 x i16> %z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2) #0 {
; CHECK-LABEL: test_svcreate3_s16_vec2:
; CHECK: // %L2
; CHECK-NEXT: mov z0.d, z2.d
@ -314,7 +315,7 @@ L2:
; SVCREATE3 (half)
;
define <vscale x 8 x half> @test_svcreate3_f16_vec0(i1 %p, <vscale x 8 x half> %z0, <vscale x 8 x half> %z1, <vscale x 8 x half> %z2) local_unnamed_addr #0 {
define <vscale x 8 x half> @test_svcreate3_f16_vec0(i1 %p, <vscale x 8 x half> %z0, <vscale x 8 x half> %z1, <vscale x 8 x half> %z2) #0 {
; CHECK-LABEL: test_svcreate3_f16_vec0:
; CHECK: // %L2
; CHECK-NEXT: ret
@ -327,7 +328,7 @@ L2:
ret <vscale x 8 x half> %extract
}
define <vscale x 8 x half> @test_svcreate3_f16_vec2(i1 %p, <vscale x 8 x half> %z0, <vscale x 8 x half> %z1, <vscale x 8 x half> %z2) local_unnamed_addr #0 {
define <vscale x 8 x half> @test_svcreate3_f16_vec2(i1 %p, <vscale x 8 x half> %z0, <vscale x 8 x half> %z1, <vscale x 8 x half> %z2) #0 {
; CHECK-LABEL: test_svcreate3_f16_vec2:
; CHECK: // %L2
; CHECK-NEXT: mov z0.d, z2.d
@ -345,7 +346,7 @@ L2:
; SVCREATE3 (bfloat)
;
define <vscale x 8 x bfloat> @test_svcreate3_bf16_vec0(i1 %p, <vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1, <vscale x 8 x bfloat> %z2) local_unnamed_addr #1 {
define <vscale x 8 x bfloat> @test_svcreate3_bf16_vec0(i1 %p, <vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1, <vscale x 8 x bfloat> %z2) #1 {
; CHECK-LABEL: test_svcreate3_bf16_vec0:
; CHECK: // %L2
; CHECK-NEXT: ret
@ -358,7 +359,7 @@ L2:
ret <vscale x 8 x bfloat> %extract
}
define <vscale x 8 x bfloat> @test_svcreate3_bf16_vec2(i1 %p, <vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1, <vscale x 8 x bfloat> %z2) local_unnamed_addr #1 {
define <vscale x 8 x bfloat> @test_svcreate3_bf16_vec2(i1 %p, <vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1, <vscale x 8 x bfloat> %z2) #1 {
; CHECK-LABEL: test_svcreate3_bf16_vec2:
; CHECK: // %L2
; CHECK-NEXT: mov z0.d, z2.d
@ -376,7 +377,7 @@ L2:
; SVCREATE3 (i32)
;
define <vscale x 4 x i32> @test_svcreate3_s32_vec0(i1 %p, <vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2) local_unnamed_addr #0 {
define <vscale x 4 x i32> @test_svcreate3_s32_vec0(i1 %p, <vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2) #0 {
; CHECK-LABEL: test_svcreate3_s32_vec0:
; CHECK: // %L2
; CHECK-NEXT: ret
@ -389,7 +390,7 @@ L2:
ret <vscale x 4 x i32> %extract
}
define <vscale x 4 x i32> @test_svcreate3_s32_vec2(i1 %p, <vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2) local_unnamed_addr #0 {
define <vscale x 4 x i32> @test_svcreate3_s32_vec2(i1 %p, <vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2) #0 {
; CHECK-LABEL: test_svcreate3_s32_vec2:
; CHECK: // %L2
; CHECK-NEXT: mov z0.d, z2.d
@ -407,7 +408,7 @@ L2:
; SVCREATE3 (float)
;
define <vscale x 4 x float> @test_svcreate3_f32_vec0(i1 %p, <vscale x 4 x float> %z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %z2) local_unnamed_addr #0 {
define <vscale x 4 x float> @test_svcreate3_f32_vec0(i1 %p, <vscale x 4 x float> %z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %z2) #0 {
; CHECK-LABEL: test_svcreate3_f32_vec0:
; CHECK: // %L2
; CHECK-NEXT: ret
@ -420,7 +421,7 @@ L2:
ret <vscale x 4 x float> %extract
}
define <vscale x 4 x float> @test_svcreate3_f32_vec2(i1 %p, <vscale x 4 x float> %z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %z2) local_unnamed_addr #0 {
define <vscale x 4 x float> @test_svcreate3_f32_vec2(i1 %p, <vscale x 4 x float> %z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %z2) #0 {
; CHECK-LABEL: test_svcreate3_f32_vec2:
; CHECK: // %L2
; CHECK-NEXT: mov z0.d, z2.d
@ -438,7 +439,7 @@ L2:
; SVCREATE3 (i64)
;
define <vscale x 2 x i64> @test_svcreate3_s64_vec0(i1 %p, <vscale x 2 x i64> %z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2) local_unnamed_addr #0 {
define <vscale x 2 x i64> @test_svcreate3_s64_vec0(i1 %p, <vscale x 2 x i64> %z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2) #0 {
; CHECK-LABEL: test_svcreate3_s64_vec0:
; CHECK: // %L2
; CHECK-NEXT: ret
@ -451,7 +452,7 @@ L2:
ret <vscale x 2 x i64> %extract
}
define <vscale x 2 x i64> @test_svcreate3_s64_vec2(i1 %p, <vscale x 2 x i64> %z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2) local_unnamed_addr #0 {
define <vscale x 2 x i64> @test_svcreate3_s64_vec2(i1 %p, <vscale x 2 x i64> %z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2) #0 {
; CHECK-LABEL: test_svcreate3_s64_vec2:
; CHECK: // %L2
; CHECK-NEXT: mov z0.d, z2.d
@ -469,7 +470,7 @@ L2:
; SVCREATE3 (double)
;
define <vscale x 2 x double> @test_svcreate3_f64_vec0(i1 %p, <vscale x 2 x double> %z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %z2) local_unnamed_addr #0 {
define <vscale x 2 x double> @test_svcreate3_f64_vec0(i1 %p, <vscale x 2 x double> %z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %z2) #0 {
; CHECK-LABEL: test_svcreate3_f64_vec0:
; CHECK: // %L2
; CHECK-NEXT: ret
@ -482,7 +483,7 @@ L2:
ret <vscale x 2 x double> %extract
}
define <vscale x 2 x double> @test_svcreate3_f64_vec2(i1 %p, <vscale x 2 x double> %z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %z2) local_unnamed_addr #0 {
define <vscale x 2 x double> @test_svcreate3_f64_vec2(i1 %p, <vscale x 2 x double> %z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %z2) #0 {
; CHECK-LABEL: test_svcreate3_f64_vec2:
; CHECK: // %L2
; CHECK-NEXT: mov z0.d, z2.d
@ -500,7 +501,7 @@ L2:
; SVCREATE4 (i8)
;
define <vscale x 16 x i8> @test_svcreate4_s8_vec0(i1 %p, <vscale x 16 x i8> %z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2, <vscale x 16 x i8> %z3) local_unnamed_addr #0 {
define <vscale x 16 x i8> @test_svcreate4_s8_vec0(i1 %p, <vscale x 16 x i8> %z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2, <vscale x 16 x i8> %z3) #0 {
; CHECK-LABEL: test_svcreate4_s8_vec0:
; CHECK: // %L2
; CHECK-NEXT: ret
@ -513,7 +514,7 @@ L2:
ret <vscale x 16 x i8> %extract
}
define <vscale x 16 x i8> @test_svcreate4_s8_vec3(i1 %p, <vscale x 16 x i8> %z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2, <vscale x 16 x i8> %z3) local_unnamed_addr #0 {
define <vscale x 16 x i8> @test_svcreate4_s8_vec3(i1 %p, <vscale x 16 x i8> %z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2, <vscale x 16 x i8> %z3) #0 {
; CHECK-LABEL: test_svcreate4_s8_vec3:
; CHECK: // %L2
; CHECK-NEXT: mov z0.d, z3.d
@ -531,7 +532,7 @@ L2:
; SVCREATE4 (i16)
;
define <vscale x 8 x i16> @test_svcreate4_s16_vec0(i1 %p, <vscale x 8 x i16> %z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2, <vscale x 8 x i16> %z3) local_unnamed_addr #0 {
define <vscale x 8 x i16> @test_svcreate4_s16_vec0(i1 %p, <vscale x 8 x i16> %z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2, <vscale x 8 x i16> %z3) #0 {
; CHECK-LABEL: test_svcreate4_s16_vec0:
; CHECK: // %L2
; CHECK-NEXT: ret
@ -544,7 +545,7 @@ L2:
ret <vscale x 8 x i16> %extract
}
define <vscale x 8 x i16> @test_svcreate4_s16_vec3(i1 %p, <vscale x 8 x i16> %z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2, <vscale x 8 x i16> %z3) local_unnamed_addr #0 {
define <vscale x 8 x i16> @test_svcreate4_s16_vec3(i1 %p, <vscale x 8 x i16> %z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2, <vscale x 8 x i16> %z3) #0 {
; CHECK-LABEL: test_svcreate4_s16_vec3:
; CHECK: // %L2
; CHECK-NEXT: mov z0.d, z3.d
@ -562,7 +563,7 @@ L2:
; SVCREATE4 (half)
;
define <vscale x 8 x half> @test_svcreate4_f16_vec0(i1 %p, <vscale x 8 x half> %z0, <vscale x 8 x half> %z1, <vscale x 8 x half> %z2, <vscale x 8 x half> %z3) local_unnamed_addr #0 {
define <vscale x 8 x half> @test_svcreate4_f16_vec0(i1 %p, <vscale x 8 x half> %z0, <vscale x 8 x half> %z1, <vscale x 8 x half> %z2, <vscale x 8 x half> %z3) #0 {
; CHECK-LABEL: test_svcreate4_f16_vec0:
; CHECK: // %L2
; CHECK-NEXT: ret
@ -575,7 +576,7 @@ L2:
ret <vscale x 8 x half> %extract
}
define <vscale x 8 x half> @test_svcreate4_f16_vec3(i1 %p, <vscale x 8 x half> %z0, <vscale x 8 x half> %z1, <vscale x 8 x half> %z2, <vscale x 8 x half> %z3) local_unnamed_addr #0 {
define <vscale x 8 x half> @test_svcreate4_f16_vec3(i1 %p, <vscale x 8 x half> %z0, <vscale x 8 x half> %z1, <vscale x 8 x half> %z2, <vscale x 8 x half> %z3) #0 {
; CHECK-LABEL: test_svcreate4_f16_vec3:
; CHECK: // %L2
; CHECK-NEXT: mov z0.d, z3.d
@ -593,7 +594,7 @@ L2:
; SVCREATE4 (bfloat)
;
define <vscale x 8 x bfloat> @test_svcreate4_bf16_vec0(i1 %p, <vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1, <vscale x 8 x bfloat> %z2, <vscale x 8 x bfloat> %z3) local_unnamed_addr #1 {
define <vscale x 8 x bfloat> @test_svcreate4_bf16_vec0(i1 %p, <vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1, <vscale x 8 x bfloat> %z2, <vscale x 8 x bfloat> %z3) #1 {
; CHECK-LABEL: test_svcreate4_bf16_vec0:
; CHECK: // %L2
; CHECK-NEXT: ret
@ -606,7 +607,7 @@ L2:
ret <vscale x 8 x bfloat> %extract
}
define <vscale x 8 x bfloat> @test_svcreate4_bf16_vec3(i1 %p, <vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1, <vscale x 8 x bfloat> %z2, <vscale x 8 x bfloat> %z3) local_unnamed_addr #1 {
define <vscale x 8 x bfloat> @test_svcreate4_bf16_vec3(i1 %p, <vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1, <vscale x 8 x bfloat> %z2, <vscale x 8 x bfloat> %z3) #1 {
; CHECK-LABEL: test_svcreate4_bf16_vec3:
; CHECK: // %L2
; CHECK-NEXT: mov z0.d, z3.d
@ -624,7 +625,7 @@ L2:
; SVCREATE4 (i32)
;
define <vscale x 4 x i32> @test_svcreate4_s32_vec0(i1 %p, <vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3) local_unnamed_addr #0 {
define <vscale x 4 x i32> @test_svcreate4_s32_vec0(i1 %p, <vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3) #0 {
; CHECK-LABEL: test_svcreate4_s32_vec0:
; CHECK: // %L2
; CHECK-NEXT: ret
@ -637,7 +638,7 @@ L2:
ret <vscale x 4 x i32> %extract
}
define <vscale x 4 x i32> @test_svcreate4_s32_vec3(i1 %p, <vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3) local_unnamed_addr #0 {
define <vscale x 4 x i32> @test_svcreate4_s32_vec3(i1 %p, <vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3) #0 {
; CHECK-LABEL: test_svcreate4_s32_vec3:
; CHECK: // %L2
; CHECK-NEXT: mov z0.d, z3.d
@ -655,7 +656,7 @@ L2:
; SVCREATE4 (float)
;
define <vscale x 4 x float> @test_svcreate4_f32_vec0(i1 %p, <vscale x 4 x float> %z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %z2, <vscale x 4 x float> %z3) local_unnamed_addr #0 {
define <vscale x 4 x float> @test_svcreate4_f32_vec0(i1 %p, <vscale x 4 x float> %z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %z2, <vscale x 4 x float> %z3) #0 {
; CHECK-LABEL: test_svcreate4_f32_vec0:
; CHECK: // %L2
; CHECK-NEXT: ret
@ -668,7 +669,7 @@ L2:
ret <vscale x 4 x float> %extract
}
define <vscale x 4 x float> @test_svcreate4_f32_vec3(i1 %p, <vscale x 4 x float> %z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %z2, <vscale x 4 x float> %z3) local_unnamed_addr #0 {
define <vscale x 4 x float> @test_svcreate4_f32_vec3(i1 %p, <vscale x 4 x float> %z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %z2, <vscale x 4 x float> %z3) #0 {
; CHECK-LABEL: test_svcreate4_f32_vec3:
; CHECK: // %L2
; CHECK-NEXT: mov z0.d, z3.d
@ -686,7 +687,7 @@ L2:
; SVCREATE4 (i64)
;
define <vscale x 2 x i64> @test_svcreate4_s64_vec0(i1 %p, <vscale x 2 x i64> %z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2, <vscale x 2 x i64> %z3) local_unnamed_addr #0 {
define <vscale x 2 x i64> @test_svcreate4_s64_vec0(i1 %p, <vscale x 2 x i64> %z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2, <vscale x 2 x i64> %z3) #0 {
; CHECK-LABEL: test_svcreate4_s64_vec0:
; CHECK: // %L2
; CHECK-NEXT: ret
@ -699,7 +700,7 @@ L2:
ret <vscale x 2 x i64> %extract
}
define <vscale x 2 x i64> @test_svcreate4_s64_vec3(i1 %p, <vscale x 2 x i64> %z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2, <vscale x 2 x i64> %z3) local_unnamed_addr #0 {
define <vscale x 2 x i64> @test_svcreate4_s64_vec3(i1 %p, <vscale x 2 x i64> %z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2, <vscale x 2 x i64> %z3) #0 {
; CHECK-LABEL: test_svcreate4_s64_vec3:
; CHECK: // %L2
; CHECK-NEXT: mov z0.d, z3.d
@ -717,7 +718,7 @@ L2:
; SVCREATE4 (double)
;
define <vscale x 2 x double> @test_svcreate4_f64_vec0(i1 %p, <vscale x 2 x double> %z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %z2, <vscale x 2 x double> %z3) local_unnamed_addr #0 {
define <vscale x 2 x double> @test_svcreate4_f64_vec0(i1 %p, <vscale x 2 x double> %z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %z2, <vscale x 2 x double> %z3) #0 {
; CHECK-LABEL: test_svcreate4_f64_vec0:
; CHECK: // %L2
; CHECK-NEXT: ret
@ -730,7 +731,7 @@ L2:
ret <vscale x 2 x double> %extract
}
define <vscale x 2 x double> @test_svcreate4_f64_vec3(i1 %p, <vscale x 2 x double> %z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %z2, <vscale x 2 x double> %z3) local_unnamed_addr #0 {
define <vscale x 2 x double> @test_svcreate4_f64_vec3(i1 %p, <vscale x 2 x double> %z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %z2, <vscale x 2 x double> %z3) #0 {
; CHECK-LABEL: test_svcreate4_f64_vec3:
; CHECK: // %L2
; CHECK-NEXT: mov z0.d, z3.d
@ -744,9 +745,9 @@ L2:
ret <vscale x 2 x double> %extract
}
attributes #0 = { nounwind "target-features"="+sve" }
attributes #0 = { nounwind }
; +bf16 is required for the bfloat version.
attributes #1 = { nounwind "target-features"="+sve,+bf16" }
attributes #1 = { nounwind "target-features"="+bf16" }
declare <vscale x 4 x double> @llvm.aarch64.sve.tuple.create2.nxv4f64.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
declare <vscale x 8 x float> @llvm.aarch64.sve.tuple.create2.nxv8f32.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)

View File

@ -1,4 +1,5 @@
; RUN: llc -mtriple aarch64 -mattr=+sve -asm-verbose=0 < %s | FileCheck %s
; RUN: llc -mtriple aarch64 -mattr=+streaming-sve -asm-verbose=0 < %s | FileCheck %s
; All these tests create a vector tuple, insert z5 into one of the elements,
; and finally extracts that element from the wide vector to return it. These
@ -228,7 +229,7 @@ define <vscale x 4 x i32> @get_tuple4_nxv16i32_elt3(<vscale x 16 x i32> %tuple)
ret <vscale x 4 x i32> %ext
}
attributes #0 = { nounwind "target-features"="+sve" }
attributes #0 = { nounwind }
declare <vscale x 8 x i32> @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
declare <vscale x 8 x i32> @llvm.aarch64.sve.tuple.set.nxv8i32.nxv4i32(<vscale x 8 x i32>, i32, <vscale x 4 x i32>)

View File

@ -1,4 +1,5 @@
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+streaming-sve < %s | FileCheck %s
;
; BRKA