[AArch64] Prefer fmov over orr v.16b when copying f32/f64

This changes the lowering of f32 and f64 COPY from a 128bit vector ORR to
a fmov of the appropriate type. At least on some CPU's with 64bit NEON
data paths this is expected to be faster, and shouldn't be slower on any
CPU that treats fmov as a register rename.

Differential Revision: https://reviews.llvm.org/D106365
This commit is contained in:
David Green 2021-08-03 17:25:40 +01:00
parent eec96db184
commit bd07c2e266
28 changed files with 216 additions and 261 deletions

View File

@ -2099,10 +2099,8 @@ bool AArch64InstrInfo::isFPRCopy(const MachineInstr &MI) {
default:
break;
case TargetOpcode::COPY: {
// FPR64 copies will by lowered to ORR.16b
Register DstReg = MI.getOperand(0).getReg();
return (AArch64::FPR64RegClass.contains(DstReg) ||
AArch64::FPR128RegClass.contains(DstReg));
return AArch64::FPR128RegClass.contains(DstReg);
}
case AArch64::ORRv16i8:
if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
@ -3503,77 +3501,37 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
if (AArch64::FPR64RegClass.contains(DestReg) &&
AArch64::FPR64RegClass.contains(SrcReg)) {
if (Subtarget.hasNEON()) {
DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub,
&AArch64::FPR128RegClass);
SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub,
&AArch64::FPR128RegClass);
BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
.addReg(SrcReg)
.addReg(SrcReg, getKillRegState(KillSrc));
} else {
BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
}
BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
return;
}
if (AArch64::FPR32RegClass.contains(DestReg) &&
AArch64::FPR32RegClass.contains(SrcReg)) {
if (Subtarget.hasNEON()) {
DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
&AArch64::FPR128RegClass);
SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
&AArch64::FPR128RegClass);
BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
.addReg(SrcReg)
.addReg(SrcReg, getKillRegState(KillSrc));
} else {
BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
}
BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
return;
}
if (AArch64::FPR16RegClass.contains(DestReg) &&
AArch64::FPR16RegClass.contains(SrcReg)) {
if (Subtarget.hasNEON()) {
DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
&AArch64::FPR128RegClass);
SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
&AArch64::FPR128RegClass);
BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
.addReg(SrcReg)
.addReg(SrcReg, getKillRegState(KillSrc));
} else {
DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
&AArch64::FPR32RegClass);
SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
&AArch64::FPR32RegClass);
BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
}
DestReg =
RI.getMatchingSuperReg(DestReg, AArch64::hsub, &AArch64::FPR32RegClass);
SrcReg =
RI.getMatchingSuperReg(SrcReg, AArch64::hsub, &AArch64::FPR32RegClass);
BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
return;
}
if (AArch64::FPR8RegClass.contains(DestReg) &&
AArch64::FPR8RegClass.contains(SrcReg)) {
if (Subtarget.hasNEON()) {
DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
&AArch64::FPR128RegClass);
SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
&AArch64::FPR128RegClass);
BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
.addReg(SrcReg)
.addReg(SrcReg, getKillRegState(KillSrc));
} else {
DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
&AArch64::FPR32RegClass);
SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
&AArch64::FPR32RegClass);
BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
}
DestReg =
RI.getMatchingSuperReg(DestReg, AArch64::bsub, &AArch64::FPR32RegClass);
SrcReg =
RI.getMatchingSuperReg(SrcReg, AArch64::bsub, &AArch64::FPR32RegClass);
BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
return;
}

View File

@ -1,6 +1,4 @@
; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-eabi -aarch64-neon-syntax=apple -aarch64-enable-simd-scalar=true -asm-verbose=false -disable-adv-copy-opt=true | FileCheck %s -check-prefix=CHECK
; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-eabi -aarch64-neon-syntax=apple -aarch64-enable-simd-scalar=true -asm-verbose=false -disable-adv-copy-opt=false | FileCheck %s -check-prefix=CHECK
; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-eabi -aarch64-neon-syntax=generic -aarch64-enable-simd-scalar=true -asm-verbose=false -disable-adv-copy-opt=true | FileCheck %s -check-prefix=GENERIC
; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-eabi -aarch64-neon-syntax=generic -aarch64-enable-simd-scalar=true -asm-verbose=false -disable-adv-copy-opt=false | FileCheck %s -check-prefix=GENERIC
define <2 x i64> @bar(<2 x i64> %a, <2 x i64> %b) nounwind readnone {

View File

@ -124,7 +124,7 @@ entry:
; Check that f16 can be passed and returned (ACLE 2.0 extension)
define half @test_half(float, half %arg) {
; CHECK-LABEL: test_half:
; CHECK: mov v0.16b, v1.16b
; CHECK: fmov s0, s1
ret half %arg;
}
@ -138,7 +138,7 @@ define half @test_half_const() {
; Check that v4f16 can be passed and returned in registers
define dso_local <4 x half> @test_v4_half_register(float, <4 x half> %arg) {
; CHECK-LABEL: test_v4_half_register:
; CHECK: mov v0.16b, v1.16b
; CHECK: fmov d0, d1
ret <4 x half> %arg;
}

View File

@ -224,7 +224,7 @@ define <8 x i8> @ins16b8(<16 x i8> %tmp1, <8 x i8> %tmp2) {
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: mov v1.b[7], v0.b[2]
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: fmov d0, d1
; CHECK-NEXT: ret
%tmp3 = extractelement <16 x i8> %tmp1, i32 2
%tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 7
@ -236,7 +236,7 @@ define <4 x i16> @ins8h4(<8 x i16> %tmp1, <4 x i16> %tmp2) {
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: mov v1.h[3], v0.h[2]
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: fmov d0, d1
; CHECK-NEXT: ret
%tmp3 = extractelement <8 x i16> %tmp1, i32 2
%tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3
@ -248,7 +248,7 @@ define <2 x i32> @ins4s2(<4 x i32> %tmp1, <2 x i32> %tmp2) {
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: mov v1.s[1], v0.s[2]
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: fmov d0, d1
; CHECK-NEXT: ret
%tmp3 = extractelement <4 x i32> %tmp1, i32 2
%tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1
@ -260,7 +260,7 @@ define <1 x i64> @ins2d1(<2 x i64> %tmp1, <1 x i64> %tmp2) {
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: mov v1.d[0], v0.d[0]
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: fmov d0, d1
; CHECK-NEXT: ret
%tmp3 = extractelement <2 x i64> %tmp1, i32 0
%tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0
@ -272,7 +272,7 @@ define <2 x float> @ins4f2(<4 x float> %tmp1, <2 x float> %tmp2) {
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: mov v1.s[1], v0.s[2]
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: fmov d0, d1
; CHECK-NEXT: ret
%tmp3 = extractelement <4 x float> %tmp1, i32 2
%tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1
@ -296,7 +296,7 @@ define <8 x i8> @ins8b8(<8 x i8> %tmp1, <8 x i8> %tmp2) {
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: mov v1.b[4], v0.b[2]
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: fmov d0, d1
; CHECK-NEXT: ret
%tmp3 = extractelement <8 x i8> %tmp1, i32 2
%tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 4
@ -309,7 +309,7 @@ define <4 x i16> @ins4h4(<4 x i16> %tmp1, <4 x i16> %tmp2) {
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: mov v1.h[3], v0.h[2]
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: fmov d0, d1
; CHECK-NEXT: ret
%tmp3 = extractelement <4 x i16> %tmp1, i32 2
%tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3
@ -322,7 +322,7 @@ define <2 x i32> @ins2s2(<2 x i32> %tmp1, <2 x i32> %tmp2) {
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: mov v1.s[1], v0.s[0]
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: fmov d0, d1
; CHECK-NEXT: ret
%tmp3 = extractelement <2 x i32> %tmp1, i32 0
%tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1
@ -335,7 +335,7 @@ define <1 x i64> @ins1d1(<1 x i64> %tmp1, <1 x i64> %tmp2) {
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: mov v1.d[0], v0.d[0]
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: fmov d0, d1
; CHECK-NEXT: ret
%tmp3 = extractelement <1 x i64> %tmp1, i32 0
%tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0
@ -348,7 +348,7 @@ define <2 x float> @ins2f2(<2 x float> %tmp1, <2 x float> %tmp2) {
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: mov v1.s[1], v0.s[0]
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: fmov d0, d1
; CHECK-NEXT: ret
%tmp3 = extractelement <2 x float> %tmp1, i32 0
%tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1
@ -584,7 +584,7 @@ define <8 x i8> @test_vcopy_lane_swap_s8(<8 x i8> %v1, <8 x i8> %v2) {
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: mov v1.b[7], v0.b[0]
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: fmov d0, d1
; CHECK-NEXT: ret
%vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 0>
ret <8 x i8> %vset_lane
@ -1236,7 +1236,7 @@ define <4 x i16> @test_extracts_inserts_varidx_extract(<8 x i16> %x, i32 %idx) {
; CHECK-NEXT: mov v1.h[1], v0.h[1]
; CHECK-NEXT: mov v1.h[2], v0.h[2]
; CHECK-NEXT: mov v1.h[3], v0.h[3]
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: fmov d0, d1
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
%tmp = extractelement <8 x i16> %x, i32 %idx
@ -1264,7 +1264,7 @@ define <4 x i16> @test_extracts_inserts_varidx_insert(<8 x i16> %x, i32 %idx) {
; CHECK-NEXT: mov v1.h[1], v0.h[1]
; CHECK-NEXT: mov v1.h[2], v0.h[2]
; CHECK-NEXT: mov v1.h[3], v0.h[3]
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: fmov d0, d1
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
%tmp = extractelement <8 x i16> %x, i32 0

View File

@ -208,9 +208,9 @@ define half @test_vcvt_f16_f32(<1 x float> %x) {
;
; FAST-LABEL: test_vcvt_f16_f32:
; FAST: // %bb.0:
; FAST-NEXT: mov.16b v1, v0
; FAST-NEXT: fmov d1, d0
; FAST-NEXT: // implicit-def: $q0
; FAST-NEXT: mov.16b v0, v1
; FAST-NEXT: fmov d0, d1
; FAST-NEXT: // kill: def $s0 killed $s0 killed $q0
; FAST-NEXT: fcvt h0, s0
; FAST-NEXT: ret
@ -237,9 +237,9 @@ define <4 x float> @test_vcvt_high_f32_f64(<2 x float> %x, <2 x double> %v) noun
;
; FAST-LABEL: test_vcvt_high_f32_f64:
; FAST: // %bb.0:
; FAST-NEXT: mov.16b v2, v0
; FAST-NEXT: fmov d2, d0
; FAST-NEXT: // implicit-def: $q0
; FAST-NEXT: mov.16b v0, v2
; FAST-NEXT: fmov d0, d2
; FAST-NEXT: fcvtn2 v0.4s, v1.2d
; FAST-NEXT: ret
;
@ -276,9 +276,9 @@ define <4 x float> @test_vcvtx_high_f32_f64(<2 x float> %x, <2 x double> %v) nou
;
; FAST-LABEL: test_vcvtx_high_f32_f64:
; FAST: // %bb.0:
; FAST-NEXT: mov.16b v2, v0
; FAST-NEXT: fmov d2, d0
; FAST-NEXT: // implicit-def: $q0
; FAST-NEXT: mov.16b v0, v2
; FAST-NEXT: fmov d0, d2
; FAST-NEXT: fcvtxn2 v0.4s, v1.2d
; FAST-NEXT: ret
;
@ -313,7 +313,7 @@ define i16 @to_half(float %in) {
; FAST-NEXT: fcvt h1, s0
; FAST-NEXT: // implicit-def: $w0
; FAST-NEXT: fmov s0, w0
; FAST-NEXT: mov.16b v0, v1
; FAST-NEXT: fmov s0, s1
; FAST-NEXT: fmov w0, s0
; FAST-NEXT: // kill: def $w1 killed $w0
; FAST-NEXT: ret

View File

@ -205,7 +205,7 @@ define <2 x float> @test_insert_v2f32_undef_zero_vector(float %a) {
; CHECK-NEXT: movi d1, #0000000000000000
; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
; CHECK-NEXT: mov.s v1[1], v0[0]
; CHECK-NEXT: mov.16b v0, v1
; CHECK-NEXT: fmov d0, d1
; CHECK-NEXT: ret
%v.0 = insertelement <2 x float> <float 0.000000e+00, float undef>, float %a, i32 1
ret <2 x float> %v.0

View File

@ -4,7 +4,7 @@
define <4 x i16> @v4bf16_to_v4i16(float, <4 x bfloat> %a) nounwind {
; CHECK-LABEL: v4bf16_to_v4i16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: fmov d0, d1
; CHECK-NEXT: ret
entry:
%1 = bitcast <4 x bfloat> %a to <4 x i16>
@ -14,7 +14,7 @@ entry:
define <2 x i32> @v4bf16_to_v2i32(float, <4 x bfloat> %a) nounwind {
; CHECK-LABEL: v4bf16_to_v2i32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: fmov d0, d1
; CHECK-NEXT: ret
entry:
%1 = bitcast <4 x bfloat> %a to <2 x i32>
@ -24,7 +24,7 @@ entry:
define <1 x i64> @v4bf16_to_v1i64(float, <4 x bfloat> %a) nounwind {
; CHECK-LABEL: v4bf16_to_v1i64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: fmov d0, d1
; CHECK-NEXT: ret
entry:
%1 = bitcast <4 x bfloat> %a to <1 x i64>
@ -44,7 +44,7 @@ entry:
define <2 x float> @v4bf16_to_v2float(float, <4 x bfloat> %a) nounwind {
; CHECK-LABEL: v4bf16_to_v2float:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: fmov d0, d1
; CHECK-NEXT: ret
entry:
%1 = bitcast <4 x bfloat> %a to <2 x float>
@ -54,7 +54,7 @@ entry:
define <1 x double> @v4bf16_to_v1double(float, <4 x bfloat> %a) nounwind {
; CHECK-LABEL: v4bf16_to_v1double:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: fmov d0, d1
; CHECK-NEXT: ret
entry:
%1 = bitcast <4 x bfloat> %a to <1 x double>
@ -64,7 +64,7 @@ entry:
define double @v4bf16_to_double(float, <4 x bfloat> %a) nounwind {
; CHECK-LABEL: v4bf16_to_double:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: fmov d0, d1
; CHECK-NEXT: ret
entry:
%1 = bitcast <4 x bfloat> %a to double
@ -75,7 +75,7 @@ entry:
define <4 x bfloat> @v4i16_to_v4bf16(float, <4 x i16> %a) nounwind {
; CHECK-LABEL: v4i16_to_v4bf16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: fmov d0, d1
; CHECK-NEXT: ret
entry:
%1 = bitcast <4 x i16> %a to <4 x bfloat>
@ -85,7 +85,7 @@ entry:
define <4 x bfloat> @v2i32_to_v4bf16(float, <2 x i32> %a) nounwind {
; CHECK-LABEL: v2i32_to_v4bf16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: fmov d0, d1
; CHECK-NEXT: ret
entry:
%1 = bitcast <2 x i32> %a to <4 x bfloat>
@ -95,7 +95,7 @@ entry:
define <4 x bfloat> @v1i64_to_v4bf16(float, <1 x i64> %a) nounwind {
; CHECK-LABEL: v1i64_to_v4bf16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: fmov d0, d1
; CHECK-NEXT: ret
entry:
%1 = bitcast <1 x i64> %a to <4 x bfloat>
@ -115,7 +115,7 @@ entry:
define <4 x bfloat> @v2float_to_v4bf16(float, <2 x float> %a) nounwind {
; CHECK-LABEL: v2float_to_v4bf16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: fmov d0, d1
; CHECK-NEXT: ret
entry:
%1 = bitcast <2 x float> %a to <4 x bfloat>
@ -125,7 +125,7 @@ entry:
define <4 x bfloat> @v1double_to_v4bf16(float, <1 x double> %a) nounwind {
; CHECK-LABEL: v1double_to_v4bf16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: fmov d0, d1
; CHECK-NEXT: ret
entry:
%1 = bitcast <1 x double> %a to <4 x bfloat>
@ -135,7 +135,7 @@ entry:
define <4 x bfloat> @double_to_v4bf16(float, double %a) nounwind {
; CHECK-LABEL: double_to_v4bf16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: fmov d0, d1
; CHECK-NEXT: ret
entry:
%1 = bitcast double %a to <4 x bfloat>

View File

@ -150,7 +150,7 @@ define <4 x bfloat> @test_vset_lane_bf16(bfloat %a, <4 x bfloat> %v) nounwind {
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: // kill: def $h0 killed $h0 def $q0
; CHECK-NEXT: mov v1.h[1], v0.h[0]
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: fmov d0, d1
; CHECK-NEXT: ret
entry:
%vset_lane = insertelement <4 x bfloat> %v, bfloat %a, i32 1

View File

@ -11,7 +11,7 @@ define <2 x i16> @bitcast_v2i16_v2f16(<2 x half> %x) {
; CHECK-NEXT: fmov s1, w8
; CHECK-NEXT: umov w8, v0.h[1]
; CHECK-NEXT: mov v1.s[1], w8
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: fmov d0, d1
; CHECK-NEXT: ret
%y = bitcast <2 x half> %x to <2 x i16>
ret <2 x i16> %y

View File

@ -664,7 +664,7 @@ define i32 @fcmpri(i32 %argc, i8** nocapture readonly %argv) {
; CHECK-NEXT: cmp w19, #0
; CHECK-NEXT: cinc w0, w19, gt
; CHECK-NEXT: mov w1, #2
; CHECK-NEXT: mov v8.16b, v0.16b
; CHECK-NEXT: fmov d8, d0
; CHECK-NEXT: bl xoo
; CHECK-NEXT: fmov d0, #-1.00000000
; CHECK-NEXT: fadd d0, d8, d0

View File

@ -144,9 +144,9 @@ define half @test_call(half %a, half %b) #0 {
; CHECK-COMMON-LABEL: test_call_flipped:
; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]!
; CHECK-COMMON-NEXT: mov x29, sp
; CHECK-COMMON-NEXT: mov.16b v2, v0
; CHECK-COMMON-NEXT: mov.16b v0, v1
; CHECK-COMMON-NEXT: mov.16b v1, v2
; CHECK-COMMON-NEXT: fmov s2, s0
; CHECK-COMMON-NEXT: fmov s0, s1
; CHECK-COMMON-NEXT: fmov s1, s2
; CHECK-COMMON-NEXT: bl {{_?}}test_callee
; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16
; CHECK-COMMON-NEXT: ret
@ -156,9 +156,9 @@ define half @test_call_flipped(half %a, half %b) #0 {
}
; CHECK-COMMON-LABEL: test_tailcall_flipped:
; CHECK-COMMON-NEXT: mov.16b v2, v0
; CHECK-COMMON-NEXT: mov.16b v0, v1
; CHECK-COMMON-NEXT: mov.16b v1, v2
; CHECK-COMMON-NEXT: fmov s2, s0
; CHECK-COMMON-NEXT: fmov s0, s1
; CHECK-COMMON-NEXT: fmov s1, s2
; CHECK-COMMON-NEXT: b {{_?}}test_callee
define half @test_tailcall_flipped(half %a, half %b) #0 {
%r = tail call half @test_callee(half %b, half %a)
@ -542,11 +542,11 @@ else:
; CHECK-COMMON: mov x[[PTR:[0-9]+]], x0
; CHECK-COMMON: ldr h[[AB:[0-9]+]], [x0]
; CHECK-COMMON: [[LOOP:LBB[0-9_]+]]:
; CHECK-COMMON: mov.16b v[[R:[0-9]+]], v[[AB]]
; CHECK-COMMON: fmov s[[R:[0-9]+]], s[[AB]]
; CHECK-COMMON: ldr h[[AB]], [x[[PTR]]]
; CHECK-COMMON: mov x0, x[[PTR]]
; CHECK-COMMON: bl {{_?}}test_dummy
; CHECK-COMMON: mov.16b v0, v[[R]]
; CHECK-COMMON: fmov s0, s[[R]]
; CHECK-COMMON: ret
define half @test_phi(half* %p1) #0 {
entry:

View File

@ -116,11 +116,11 @@ define double @test7(double %a, double %b) nounwind {
; CHECK-NEXT: fmov d2, #-2.00000000
; CHECK-NEXT: fmul d1, d1, d2
; CHECK-NEXT: fadd d8, d0, d1
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: fmov d0, d1
; CHECK-NEXT: str x30, [sp, #8] // 8-byte Folded Spill
; CHECK-NEXT: bl use
; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload
; CHECK-NEXT: mov v0.16b, v8.16b
; CHECK-NEXT: fmov d0, d8
; CHECK-NEXT: ldr d8, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%mul = fmul double %b, -2.000000e+00
@ -132,13 +132,13 @@ define double @test7(double %a, double %b) nounwind {
define float @fadd_const_multiuse_fmf(float %x) {
; CHECK-LABEL: fadd_const_multiuse_fmf:
; CHECK: // %bb.0:
; CHECK-DAG: mov [[W59:w[0-9]+]], #1114374144
; CHECK-DAG: mov [[W42:w[0-9]+]], #1109917696
; CHECK-DAG: fmov [[FP59:s[0-9]+]], [[W59]]
; CHECK-DAG: fmov [[FP42:s[0-9]+]], [[W42]]
; CHECK-NEXT: fadd [[TMP1:s[0-9]+]], s0, [[FP42]]
; CHECK-NEXT: fadd [[TMP2:s[0-9]+]], s0, [[FP59]]
; CHECK-NEXT: fadd s0, [[TMP1]], [[TMP2]]
; CHECK-NEXT: mov w8, #1109917696
; CHECK-NEXT: mov w9, #1114374144
; CHECK-NEXT: fmov s1, w8
; CHECK-NEXT: fmov s2, w9
; CHECK-NEXT: fadd s1, s0, s1
; CHECK-NEXT: fadd s0, s0, s2
; CHECK-NEXT: fadd s0, s1, s0
; CHECK-NEXT: ret
%a1 = fadd float %x, 42.0
%a2 = fadd nsz reassoc float %a1, 17.0
@ -150,13 +150,13 @@ define float @fadd_const_multiuse_fmf(float %x) {
define float @fadd_const_multiuse_attr(float %x) {
; CHECK-LABEL: fadd_const_multiuse_attr:
; CHECK: // %bb.0:
; CHECK-DAG: mov [[W17:w[0-9]+]], #1109917696
; CHECK-DAG: mov [[W59:w[0-9]+]], #1114374144
; CHECK-NEXT: fmov [[FP17:s[0-9]+]], [[W17]]
; CHECK-NEXT: fmov [[FP59:s[0-9]+]], [[W59]]
; CHECK-NEXT: fadd [[TMP1:s[0-9]+]], s0, [[FP17]]
; CHECK-NEXT: fadd [[TMP2:s[0-9]+]], s0, [[FP59]]
; CHECK-NEXT: fadd s0, [[TMP1]], [[TMP2]]
; CHECK-NEXT: mov w8, #1109917696
; CHECK-NEXT: mov w9, #1114374144
; CHECK-NEXT: fmov s1, w8
; CHECK-NEXT: fmov s2, w9
; CHECK-NEXT: fadd s1, s0, s1
; CHECK-NEXT: fadd s0, s0, s2
; CHECK-NEXT: fadd s0, s1, s0
; CHECK-NEXT: ret
%a1 = fadd fast float %x, 42.0
%a2 = fadd fast float %a1, 17.0

View File

@ -68,7 +68,7 @@ define double @select_f64(i1 zeroext %c, double %a, double %b) {
; Now test the folding of all compares.
define float @select_fcmp_false(float %x, float %a, float %b) {
; CHECK-LABEL: select_fcmp_false
; CHECK: mov.16b {{v[0-9]+}}, v2
; CHECK: fmov {{s[0-9]+}}, s2
%1 = fcmp ogt float %x, %x
%2 = select i1 %1, float %a, float %b
ret float %2
@ -196,7 +196,7 @@ define float @select_fcmp_une(float %x, float %y, float %a, float %b) {
define float @select_fcmp_true(float %x, float %a, float %b) {
; CHECK-LABEL: select_fcmp_true
; CHECK: mov.16b {{v[0-9]+}}, v1
; CHECK: fmov {{s[0-9]+}}, s1
%1 = fcmp ueq float %x, %x
%2 = select i1 %1, float %a, float %b
ret float %2

View File

@ -138,7 +138,7 @@ define <4 x double> @h_to_d(<4 x half> %a) {
define <4 x half> @bitcast_i_to_h(float, <4 x i16> %a) {
; CHECK-COMMON-LABEL: bitcast_i_to_h:
; CHECK-COMMON: mov v0.16b, v1.16b
; CHECK-COMMON: fmov d0, d1
; CHECK-COMMON-NEXT: ret
%2 = bitcast <4 x i16> %a to <4 x half>
ret <4 x half> %2
@ -146,7 +146,7 @@ define <4 x half> @bitcast_i_to_h(float, <4 x i16> %a) {
define <4 x i16> @bitcast_h_to_i(float, <4 x half> %a) {
; CHECK-COMMON-LABEL: bitcast_h_to_i:
; CHECK-COMMON: mov v0.16b, v1.16b
; CHECK-COMMON: fmov d0, d1
; CHECK-COMMON-NEXT: ret
%2 = bitcast <4 x half> %a to <4 x i16>
ret <4 x i16> %2

View File

@ -4,7 +4,7 @@
define <4 x i16> @v4f16_to_v4i16(float, <4 x half> %a) #0 {
; CHECK-LABEL: v4f16_to_v4i16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: fmov d0, d1
; CHECK-NEXT: ret
entry:
%1 = bitcast <4 x half> %a to <4 x i16>
@ -14,7 +14,7 @@ entry:
define <2 x i32> @v4f16_to_v2i32(float, <4 x half> %a) #0 {
; CHECK-LABEL: v4f16_to_v2i32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: fmov d0, d1
; CHECK-NEXT: ret
entry:
%1 = bitcast <4 x half> %a to <2 x i32>
@ -24,7 +24,7 @@ entry:
define <1 x i64> @v4f16_to_v1i64(float, <4 x half> %a) #0 {
; CHECK-LABEL: v4f16_to_v1i64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: fmov d0, d1
; CHECK-NEXT: ret
entry:
%1 = bitcast <4 x half> %a to <1 x i64>
@ -44,7 +44,7 @@ entry:
define <2 x float> @v4f16_to_v2float(float, <4 x half> %a) #0 {
; CHECK-LABEL: v4f16_to_v2float:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: fmov d0, d1
; CHECK-NEXT: ret
entry:
%1 = bitcast <4 x half> %a to <2 x float>
@ -54,7 +54,7 @@ entry:
define <1 x double> @v4f16_to_v1double(float, <4 x half> %a) #0 {
; CHECK-LABEL: v4f16_to_v1double:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: fmov d0, d1
; CHECK-NEXT: ret
entry:
%1 = bitcast <4 x half> %a to <1 x double>
@ -64,7 +64,7 @@ entry:
define double @v4f16_to_double(float, <4 x half> %a) #0 {
; CHECK-LABEL: v4f16_to_double:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: fmov d0, d1
; CHECK-NEXT: ret
entry:
%1 = bitcast <4 x half> %a to double
@ -75,7 +75,7 @@ entry:
define <4 x half> @v4i16_to_v4f16(float, <4 x i16> %a) #0 {
; CHECK-LABEL: v4i16_to_v4f16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: fmov d0, d1
; CHECK-NEXT: ret
entry:
%1 = bitcast <4 x i16> %a to <4 x half>
@ -85,7 +85,7 @@ entry:
define <4 x half> @v2i32_to_v4f16(float, <2 x i32> %a) #0 {
; CHECK-LABEL: v2i32_to_v4f16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: fmov d0, d1
; CHECK-NEXT: ret
entry:
%1 = bitcast <2 x i32> %a to <4 x half>
@ -95,7 +95,7 @@ entry:
define <4 x half> @v1i64_to_v4f16(float, <1 x i64> %a) #0 {
; CHECK-LABEL: v1i64_to_v4f16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: fmov d0, d1
; CHECK-NEXT: ret
entry:
%1 = bitcast <1 x i64> %a to <4 x half>
@ -115,7 +115,7 @@ entry:
define <4 x half> @v2float_to_v4f16(float, <2 x float> %a) #0 {
; CHECK-LABEL: v2float_to_v4f16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: fmov d0, d1
; CHECK-NEXT: ret
entry:
%1 = bitcast <2 x float> %a to <4 x half>
@ -125,7 +125,7 @@ entry:
define <4 x half> @v1double_to_v4f16(float, <1 x double> %a) #0 {
; CHECK-LABEL: v1double_to_v4f16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: fmov d0, d1
; CHECK-NEXT: ret
entry:
%1 = bitcast <1 x double> %a to <4 x half>
@ -135,7 +135,7 @@ entry:
define <4 x half> @double_to_v4f16(float, double %a) #0 {
; CHECK-LABEL: double_to_v4f16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: fmov d0, d1
; CHECK-NEXT: ret
entry:
%1 = bitcast double %a to <4 x half>

View File

@ -148,7 +148,7 @@ define i100 @test_signed_i100_f32(float %f) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: str x30, [sp, #8] // 8-byte Folded Spill
; CHECK-NEXT: mov v8.16b, v0.16b
; CHECK-NEXT: fmov s8, s0
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: mov w8, #-251658240
; CHECK-NEXT: fmov s0, w8
@ -177,7 +177,7 @@ define i128 @test_signed_i128_f32(float %f) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: str x30, [sp, #8] // 8-byte Folded Spill
; CHECK-NEXT: mov v8.16b, v0.16b
; CHECK-NEXT: fmov s8, s0
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: mov w8, #-16777216
; CHECK-NEXT: fmov s0, w8
@ -345,7 +345,7 @@ define i100 @test_signed_i100_f64(double %f) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: str x30, [sp, #8] // 8-byte Folded Spill
; CHECK-NEXT: mov v8.16b, v0.16b
; CHECK-NEXT: fmov d8, d0
; CHECK-NEXT: bl __fixdfti
; CHECK-NEXT: mov x8, #-4170333254945079296
; CHECK-NEXT: fmov d0, x8
@ -374,7 +374,7 @@ define i128 @test_signed_i128_f64(double %f) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: str x30, [sp, #8] // 8-byte Folded Spill
; CHECK-NEXT: mov v8.16b, v0.16b
; CHECK-NEXT: fmov d8, d0
; CHECK-NEXT: bl __fixdfti
; CHECK-NEXT: mov x8, #-4044232465378705408
; CHECK-NEXT: fmov d0, x8
@ -562,7 +562,7 @@ define i100 @test_signed_i100_f16(half %f) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: fcvt s8, h0
; CHECK-NEXT: mov v0.16b, v8.16b
; CHECK-NEXT: fmov s0, s8
; CHECK-NEXT: str x30, [sp, #8] // 8-byte Folded Spill
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: mov w8, #-251658240
@ -592,7 +592,7 @@ define i128 @test_signed_i128_f16(half %f) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: fcvt s8, h0
; CHECK-NEXT: mov v0.16b, v8.16b
; CHECK-NEXT: fmov s0, s8
; CHECK-NEXT: str x30, [sp, #8] // 8-byte Folded Spill
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: mov w8, #-16777216

View File

@ -1022,7 +1022,7 @@ define <2 x i100> @test_signed_v2f32_v2i100(<2 x float> %f) {
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: mov s8, v0.s[1]
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-NEXT: mov v0.16b, v8.16b
; CHECK-NEXT: fmov s0, s8
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: mov w8, #-251658240
; CHECK-NEXT: mov w9, #1895825407
@ -1089,7 +1089,7 @@ define <2 x i128> @test_signed_v2f32_v2i128(<2 x float> %f) {
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: mov s8, v0.s[1]
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-NEXT: mov v0.16b, v8.16b
; CHECK-NEXT: fmov s0, s8
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: mov w8, #-16777216
; CHECK-NEXT: mov w9, #2130706431
@ -1354,7 +1354,7 @@ define <2 x i100> @test_signed_v2f64_v2i100(<2 x double> %f) {
; CHECK-NEXT: .cfi_offset b10, -64
; CHECK-NEXT: mov d8, v0.d[1]
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-NEXT: mov v0.16b, v8.16b
; CHECK-NEXT: fmov d0, d8
; CHECK-NEXT: bl __fixdfti
; CHECK-NEXT: mov x8, #-4170333254945079296
; CHECK-NEXT: mov x9, #5053038781909696511
@ -1420,7 +1420,7 @@ define <2 x i128> @test_signed_v2f64_v2i128(<2 x double> %f) {
; CHECK-NEXT: .cfi_offset b10, -64
; CHECK-NEXT: mov d8, v0.d[1]
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-NEXT: mov v0.16b, v8.16b
; CHECK-NEXT: fmov d0, d8
; CHECK-NEXT: bl __fixdfti
; CHECK-NEXT: mov x8, #-4044232465378705408
; CHECK-NEXT: mov x9, #5179139571476070399
@ -1833,7 +1833,7 @@ define <4 x i100> @test_signed_v4f16_v4i100(<4 x half> %f) {
; CHECK-NEXT: mov h1, v0.h[1]
; CHECK-NEXT: fcvt s8, h1
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-NEXT: mov v0.16b, v8.16b
; CHECK-NEXT: fmov s0, s8
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: mov w8, #-251658240
@ -1851,7 +1851,7 @@ define <4 x i100> @test_signed_v4f16_v4i100(<4 x half> %f) {
; CHECK-NEXT: csinv x8, x8, xzr, le
; CHECK-NEXT: fcmp s8, s8
; CHECK-NEXT: fcvt s8, h0
; CHECK-NEXT: mov v0.16b, v8.16b
; CHECK-NEXT: fmov s0, s8
; CHECK-NEXT: csel x19, xzr, x8, vs
; CHECK-NEXT: csel x20, xzr, x9, vs
; CHECK-NEXT: bl __fixsfti
@ -1865,7 +1865,7 @@ define <4 x i100> @test_signed_v4f16_v4i100(<4 x half> %f) {
; CHECK-NEXT: csinv x8, x8, xzr, le
; CHECK-NEXT: fcmp s8, s8
; CHECK-NEXT: fcvt s8, h0
; CHECK-NEXT: mov v0.16b, v8.16b
; CHECK-NEXT: fmov s0, s8
; CHECK-NEXT: csel x21, xzr, x8, vs
; CHECK-NEXT: csel x22, xzr, x9, vs
; CHECK-NEXT: bl __fixsfti
@ -1878,7 +1878,7 @@ define <4 x i100> @test_signed_v4f16_v4i100(<4 x half> %f) {
; CHECK-NEXT: csinv x8, x8, xzr, le
; CHECK-NEXT: fcmp s8, s8
; CHECK-NEXT: fcvt s8, h0
; CHECK-NEXT: mov v0.16b, v8.16b
; CHECK-NEXT: fmov s0, s8
; CHECK-NEXT: csel x23, xzr, x8, vs
; CHECK-NEXT: csel x24, xzr, x9, vs
; CHECK-NEXT: bl __fixsfti
@ -1941,7 +1941,7 @@ define <4 x i128> @test_signed_v4f16_v4i128(<4 x half> %f) {
; CHECK-NEXT: mov h1, v0.h[1]
; CHECK-NEXT: fcvt s8, h1
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-NEXT: mov v0.16b, v8.16b
; CHECK-NEXT: fmov s0, s8
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: mov w8, #-16777216
@ -1959,7 +1959,7 @@ define <4 x i128> @test_signed_v4f16_v4i128(<4 x half> %f) {
; CHECK-NEXT: csinv x8, x8, xzr, le
; CHECK-NEXT: fcmp s8, s8
; CHECK-NEXT: fcvt s8, h0
; CHECK-NEXT: mov v0.16b, v8.16b
; CHECK-NEXT: fmov s0, s8
; CHECK-NEXT: csel x19, xzr, x8, vs
; CHECK-NEXT: csel x20, xzr, x9, vs
; CHECK-NEXT: bl __fixsfti
@ -1973,7 +1973,7 @@ define <4 x i128> @test_signed_v4f16_v4i128(<4 x half> %f) {
; CHECK-NEXT: csinv x8, x8, xzr, le
; CHECK-NEXT: fcmp s8, s8
; CHECK-NEXT: fcvt s8, h0
; CHECK-NEXT: mov v0.16b, v8.16b
; CHECK-NEXT: fmov s0, s8
; CHECK-NEXT: csel x21, xzr, x8, vs
; CHECK-NEXT: csel x22, xzr, x9, vs
; CHECK-NEXT: bl __fixsfti
@ -1986,7 +1986,7 @@ define <4 x i128> @test_signed_v4f16_v4i128(<4 x half> %f) {
; CHECK-NEXT: csinv x8, x8, xzr, le
; CHECK-NEXT: fcmp s8, s8
; CHECK-NEXT: fcvt s8, h0
; CHECK-NEXT: mov v0.16b, v8.16b
; CHECK-NEXT: fmov s0, s8
; CHECK-NEXT: csel x23, xzr, x8, vs
; CHECK-NEXT: csel x24, xzr, x9, vs
; CHECK-NEXT: bl __fixsfti

View File

@ -129,7 +129,7 @@ define i100 @test_unsigned_i100_f32(float %f) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: str x30, [sp, #8] // 8-byte Folded Spill
; CHECK-NEXT: mov v8.16b, v0.16b
; CHECK-NEXT: fmov s8, s0
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: mov w8, #1904214015
; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload
@ -152,7 +152,7 @@ define i128 @test_unsigned_i128_f32(float %f) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: str x30, [sp, #8] // 8-byte Folded Spill
; CHECK-NEXT: mov v8.16b, v0.16b
; CHECK-NEXT: fmov s8, s0
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: mov w8, #2139095039
; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload
@ -296,7 +296,7 @@ define i100 @test_unsigned_i100_f64(double %f) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: str x30, [sp, #8] // 8-byte Folded Spill
; CHECK-NEXT: mov v8.16b, v0.16b
; CHECK-NEXT: fmov d8, d0
; CHECK-NEXT: bl __fixunsdfti
; CHECK-NEXT: mov x8, #5057542381537067007
; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload
@ -319,7 +319,7 @@ define i128 @test_unsigned_i128_f64(double %f) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: str x30, [sp, #8] // 8-byte Folded Spill
; CHECK-NEXT: mov v8.16b, v0.16b
; CHECK-NEXT: fmov d8, d0
; CHECK-NEXT: bl __fixunsdfti
; CHECK-NEXT: mov x8, #5183643171103440895
; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload
@ -481,7 +481,7 @@ define i100 @test_unsigned_i100_f16(half %f) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: fcvt s8, h0
; CHECK-NEXT: mov v0.16b, v8.16b
; CHECK-NEXT: fmov s0, s8
; CHECK-NEXT: str x30, [sp, #8] // 8-byte Folded Spill
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: mov w8, #1904214015
@ -505,7 +505,7 @@ define i128 @test_unsigned_i128_f16(half %f) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: fcvt s8, h0
; CHECK-NEXT: mov v0.16b, v8.16b
; CHECK-NEXT: fmov s0, s8
; CHECK-NEXT: str x30, [sp, #8] // 8-byte Folded Spill
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: mov w8, #2139095039

View File

@ -916,7 +916,7 @@ define <2 x i100> @test_unsigned_v2f32_v2i100(<2 x float> %f) {
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: mov s8, v0.s[1]
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-NEXT: mov v0.16b, v8.16b
; CHECK-NEXT: fmov s0, s8
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: mov w8, #1904214015
; CHECK-NEXT: fcmp s8, #0.0
@ -967,7 +967,7 @@ define <2 x i128> @test_unsigned_v2f32_v2i128(<2 x float> %f) {
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: mov s8, v0.s[1]
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-NEXT: mov v0.16b, v8.16b
; CHECK-NEXT: fmov s0, s8
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: mov w8, #2139095039
; CHECK-NEXT: fcmp s8, #0.0
@ -1187,7 +1187,7 @@ define <2 x i100> @test_unsigned_v2f64_v2i100(<2 x double> %f) {
; CHECK-NEXT: .cfi_offset b9, -48
; CHECK-NEXT: mov d8, v0.d[1]
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-NEXT: mov v0.16b, v8.16b
; CHECK-NEXT: fmov d0, d8
; CHECK-NEXT: bl __fixunsdfti
; CHECK-NEXT: mov x8, #5057542381537067007
; CHECK-NEXT: fcmp d8, #0.0
@ -1237,7 +1237,7 @@ define <2 x i128> @test_unsigned_v2f64_v2i128(<2 x double> %f) {
; CHECK-NEXT: .cfi_offset b9, -48
; CHECK-NEXT: mov d8, v0.d[1]
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-NEXT: mov v0.16b, v8.16b
; CHECK-NEXT: fmov d0, d8
; CHECK-NEXT: bl __fixunsdfti
; CHECK-NEXT: mov x8, #5183643171103440895
; CHECK-NEXT: fcmp d8, #0.0
@ -1579,7 +1579,7 @@ define <4 x i100> @test_unsigned_v4f16_v4i100(<4 x half> %f) {
; CHECK-NEXT: mov h1, v0.h[2]
; CHECK-NEXT: fcvt s8, h1
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-NEXT: mov v0.16b, v8.16b
; CHECK-NEXT: fmov s0, s8
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: mov w8, #1904214015
@ -1591,7 +1591,7 @@ define <4 x i100> @test_unsigned_v4f16_v4i100(<4 x half> %f) {
; CHECK-NEXT: fcmp s8, s9
; CHECK-NEXT: fcvt s8, h0
; CHECK-NEXT: mov x25, #68719476735
; CHECK-NEXT: mov v0.16b, v8.16b
; CHECK-NEXT: fmov s0, s8
; CHECK-NEXT: csel x19, x25, x10, gt
; CHECK-NEXT: csinv x20, x9, xzr, le
; CHECK-NEXT: bl __fixunssfti
@ -1602,7 +1602,7 @@ define <4 x i100> @test_unsigned_v4f16_v4i100(<4 x half> %f) {
; CHECK-NEXT: mov h0, v0.h[3]
; CHECK-NEXT: fcmp s8, s9
; CHECK-NEXT: fcvt s8, h0
; CHECK-NEXT: mov v0.16b, v8.16b
; CHECK-NEXT: fmov s0, s8
; CHECK-NEXT: csel x21, x25, x9, gt
; CHECK-NEXT: csinv x22, x8, xzr, le
; CHECK-NEXT: bl __fixunssfti
@ -1612,7 +1612,7 @@ define <4 x i100> @test_unsigned_v4f16_v4i100(<4 x half> %f) {
; CHECK-NEXT: csel x9, xzr, x1, lt
; CHECK-NEXT: fcmp s8, s9
; CHECK-NEXT: fcvt s8, h0
; CHECK-NEXT: mov v0.16b, v8.16b
; CHECK-NEXT: fmov s0, s8
; CHECK-NEXT: csel x23, x25, x9, gt
; CHECK-NEXT: csinv x24, x8, xzr, le
; CHECK-NEXT: bl __fixunssfti
@ -1665,7 +1665,7 @@ define <4 x i128> @test_unsigned_v4f16_v4i128(<4 x half> %f) {
; CHECK-NEXT: mov h1, v0.h[1]
; CHECK-NEXT: fcvt s8, h1
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-NEXT: mov v0.16b, v8.16b
; CHECK-NEXT: fmov s0, s8
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: mov w8, #2139095039
@ -1676,7 +1676,7 @@ define <4 x i128> @test_unsigned_v4f16_v4i128(<4 x half> %f) {
; CHECK-NEXT: csel x10, xzr, x0, lt
; CHECK-NEXT: fcmp s8, s9
; CHECK-NEXT: fcvt s8, h0
; CHECK-NEXT: mov v0.16b, v8.16b
; CHECK-NEXT: fmov s0, s8
; CHECK-NEXT: csinv x19, x10, xzr, le
; CHECK-NEXT: csinv x20, x9, xzr, le
; CHECK-NEXT: bl __fixunssfti
@ -1687,7 +1687,7 @@ define <4 x i128> @test_unsigned_v4f16_v4i128(<4 x half> %f) {
; CHECK-NEXT: mov h0, v0.h[3]
; CHECK-NEXT: fcmp s8, s9
; CHECK-NEXT: fcvt s8, h0
; CHECK-NEXT: mov v0.16b, v8.16b
; CHECK-NEXT: fmov s0, s8
; CHECK-NEXT: csinv x21, x9, xzr, le
; CHECK-NEXT: csinv x22, x8, xzr, le
; CHECK-NEXT: bl __fixunssfti
@ -1697,7 +1697,7 @@ define <4 x i128> @test_unsigned_v4f16_v4i128(<4 x half> %f) {
; CHECK-NEXT: csel x9, xzr, x0, lt
; CHECK-NEXT: fcmp s8, s9
; CHECK-NEXT: fcvt s8, h0
; CHECK-NEXT: mov v0.16b, v8.16b
; CHECK-NEXT: fmov s0, s8
; CHECK-NEXT: csinv x23, x9, xzr, le
; CHECK-NEXT: csinv x24, x8, xzr, le
; CHECK-NEXT: bl __fixunssfti

View File

@ -220,13 +220,13 @@ declare double @bar()
define double @reassociate_adds_from_calls() {
; CHECK-LABEL: reassociate_adds_from_calls:
; CHECK: bl bar
; CHECK-NEXT: mov v8.16b, v0.16b
; CHECK-NEXT: fmov d8, d0
; CHECK-NEXT: bl bar
; CHECK-NEXT: mov v9.16b, v0.16b
; CHECK-NEXT: fmov d9, d0
; CHECK-NEXT: bl bar
; CHECK-NEXT: mov v10.16b, v0.16b
; CHECK-NEXT: fmov d10, d0
; CHECK-NEXT: bl bar
; CHECK: fadd d1, d8, d9
; CHECK: fadd d1, d8, d9
; CHECK-NEXT: fadd d0, d10, d0
; CHECK-NEXT: fadd d0, d1, d0
%x0 = call double @bar()
@ -242,11 +242,11 @@ define double @reassociate_adds_from_calls() {
define double @already_reassociated() {
; CHECK-LABEL: already_reassociated:
; CHECK: bl bar
; CHECK-NEXT: mov v8.16b, v0.16b
; CHECK-NEXT: fmov d8, d0
; CHECK-NEXT: bl bar
; CHECK-NEXT: mov v9.16b, v0.16b
; CHECK-NEXT: fmov d9, d0
; CHECK-NEXT: bl bar
; CHECK-NEXT: mov v10.16b, v0.16b
; CHECK-NEXT: fmov d10, d0
; CHECK-NEXT: bl bar
; CHECK: fadd d1, d8, d9
; CHECK-NEXT: fadd d0, d10, d0

View File

@ -6,7 +6,7 @@ define <4 x i16> @test_mla0(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d)
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: umull v2.8h, v2.8b, v3.8b
; CHECK-NEXT: umlal v2.8h, v0.8b, v1.8b
; CHECK-NEXT: mov v0.16b, v2.16b
; CHECK-NEXT: fmov d0, d2
; CHECK-NEXT: ret
entry:
%vmull.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %a, <8 x i8> %b)
@ -22,7 +22,7 @@ define <4 x i16> @test_mla1(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d)
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: smull v2.8h, v2.8b, v3.8b
; CHECK-NEXT: smlal v2.8h, v0.8b, v1.8b
; CHECK-NEXT: mov v0.16b, v2.16b
; CHECK-NEXT: fmov d0, d2
; CHECK-NEXT: ret
entry:
%vmull.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %a, <8 x i8> %b)
@ -38,7 +38,7 @@ define <2 x i32> @test_mla2(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16>
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: umull v2.4s, v2.4h, v3.4h
; CHECK-NEXT: umlal v2.4s, v0.4h, v1.4h
; CHECK-NEXT: mov v0.16b, v2.16b
; CHECK-NEXT: fmov d0, d2
; CHECK-NEXT: ret
entry:
%vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %b)
@ -54,7 +54,7 @@ define <2 x i32> @test_mla3(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16>
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: smull v2.4s, v2.4h, v3.4h
; CHECK-NEXT: smlal v2.4s, v0.4h, v1.4h
; CHECK-NEXT: mov v0.16b, v2.16b
; CHECK-NEXT: fmov d0, d2
; CHECK-NEXT: ret
entry:
%vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %b)
@ -70,7 +70,7 @@ define <1 x i64> @test_mla4(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, <2 x i32>
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: umull v2.2d, v2.2s, v3.2s
; CHECK-NEXT: umlal v2.2d, v0.2s, v1.2s
; CHECK-NEXT: mov v0.16b, v2.16b
; CHECK-NEXT: fmov d0, d2
; CHECK-NEXT: ret
entry:
%vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %b)
@ -86,7 +86,7 @@ define <1 x i64> @test_mla5(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, <2 x i32>
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: smull v2.2d, v2.2s, v3.2s
; CHECK-NEXT: smlal v2.2d, v0.2s, v1.2s
; CHECK-NEXT: mov v0.16b, v2.16b
; CHECK-NEXT: fmov d0, d2
; CHECK-NEXT: ret
entry:
%vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %b)

View File

@ -6,7 +6,7 @@ define <8 x i8> @mla8xi8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C) {
; CHECK-LABEL: mla8xi8:
; CHECK: // %bb.0:
; CHECK-NEXT: mla v2.8b, v0.8b, v1.8b
; CHECK-NEXT: mov v0.16b, v2.16b
; CHECK-NEXT: fmov d0, d2
; CHECK-NEXT: ret
%tmp1 = mul <8 x i8> %A, %B;
%tmp2 = add <8 x i8> %C, %tmp1;
@ -28,7 +28,7 @@ define <4 x i16> @mla4xi16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C) {
; CHECK-LABEL: mla4xi16:
; CHECK: // %bb.0:
; CHECK-NEXT: mla v2.4h, v0.4h, v1.4h
; CHECK-NEXT: mov v0.16b, v2.16b
; CHECK-NEXT: fmov d0, d2
; CHECK-NEXT: ret
%tmp1 = mul <4 x i16> %A, %B;
%tmp2 = add <4 x i16> %C, %tmp1;
@ -50,7 +50,7 @@ define <2 x i32> @mla2xi32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C) {
; CHECK-LABEL: mla2xi32:
; CHECK: // %bb.0:
; CHECK-NEXT: mla v2.2s, v0.2s, v1.2s
; CHECK-NEXT: mov v0.16b, v2.16b
; CHECK-NEXT: fmov d0, d2
; CHECK-NEXT: ret
%tmp1 = mul <2 x i32> %A, %B;
%tmp2 = add <2 x i32> %C, %tmp1;
@ -72,7 +72,7 @@ define <8 x i8> @mls8xi8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C) {
; CHECK-LABEL: mls8xi8:
; CHECK: // %bb.0:
; CHECK-NEXT: mls v2.8b, v0.8b, v1.8b
; CHECK-NEXT: mov v0.16b, v2.16b
; CHECK-NEXT: fmov d0, d2
; CHECK-NEXT: ret
%tmp1 = mul <8 x i8> %A, %B;
%tmp2 = sub <8 x i8> %C, %tmp1;
@ -94,7 +94,7 @@ define <4 x i16> @mls4xi16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C) {
; CHECK-LABEL: mls4xi16:
; CHECK: // %bb.0:
; CHECK-NEXT: mls v2.4h, v0.4h, v1.4h
; CHECK-NEXT: mov v0.16b, v2.16b
; CHECK-NEXT: fmov d0, d2
; CHECK-NEXT: ret
%tmp1 = mul <4 x i16> %A, %B;
%tmp2 = sub <4 x i16> %C, %tmp1;
@ -116,7 +116,7 @@ define <2 x i32> @mls2xi32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C) {
; CHECK-LABEL: mls2xi32:
; CHECK: // %bb.0:
; CHECK-NEXT: mls v2.2s, v0.2s, v1.2s
; CHECK-NEXT: mov v0.16b, v2.16b
; CHECK-NEXT: fmov d0, d2
; CHECK-NEXT: ret
%tmp1 = mul <2 x i32> %A, %B;
%tmp2 = sub <2 x i32> %C, %tmp1;
@ -140,7 +140,7 @@ define <8 x i8> @mls2v8xi8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C) {
; CHECK: // %bb.0:
; CHECK-NEXT: neg v2.8b, v2.8b
; CHECK-NEXT: mla v2.8b, v0.8b, v1.8b
; CHECK-NEXT: mov v0.16b, v2.16b
; CHECK-NEXT: fmov d0, d2
; CHECK-NEXT: ret
%tmp1 = mul <8 x i8> %A, %B;
%tmp2 = sub <8 x i8> %tmp1, %C;
@ -164,7 +164,7 @@ define <4 x i16> @mls2v4xi16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C) {
; CHECK: // %bb.0:
; CHECK-NEXT: neg v2.4h, v2.4h
; CHECK-NEXT: mla v2.4h, v0.4h, v1.4h
; CHECK-NEXT: mov v0.16b, v2.16b
; CHECK-NEXT: fmov d0, d2
; CHECK-NEXT: ret
%tmp1 = mul <4 x i16> %A, %B;
%tmp2 = sub <4 x i16> %tmp1, %C;
@ -188,7 +188,7 @@ define <2 x i32> @mls2v2xi32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C) {
; CHECK: // %bb.0:
; CHECK-NEXT: neg v2.2s, v2.2s
; CHECK-NEXT: mla v2.2s, v0.2s, v1.2s
; CHECK-NEXT: mov v0.16b, v2.16b
; CHECK-NEXT: fmov d0, d2
; CHECK-NEXT: ret
%tmp1 = mul <2 x i32> %A, %B;
%tmp2 = sub <2 x i32> %tmp1, %C;

View File

@ -8,12 +8,12 @@ define i8 @popcount128(i128* nocapture nonnull readonly %0) {
; CHECK-NEXT: ldr x8, [x0, #8]
; CHECK-NEXT: ldr d1, [x0]
; CHECK-NEXT: // implicit-def: $q0
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: fmov d0, d1
; CHECK-NEXT: mov v0.d[1], x8
; CHECK-NEXT: cnt v0.16b, v0.16b
; CHECK-NEXT: uaddlv h1, v0.16b
; CHECK-NEXT: // implicit-def: $q0
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: fmov s0, s1
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
Entry:
@ -34,21 +34,21 @@ define i16 @popcount256(i256* nocapture nonnull readonly %0) {
; CHECK-NEXT: ldr x9, [x0, #24]
; CHECK-NEXT: ldr d1, [x0, #16]
; CHECK-NEXT: // implicit-def: $q0
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: fmov d0, d1
; CHECK-NEXT: mov v0.d[1], x9
; CHECK-NEXT: cnt v0.16b, v0.16b
; CHECK-NEXT: uaddlv h1, v0.16b
; CHECK-NEXT: // implicit-def: $q0
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: fmov s0, s1
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: ldr d1, [x0]
; CHECK-NEXT: // implicit-def: $q0
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: fmov d0, d1
; CHECK-NEXT: mov v0.d[1], x8
; CHECK-NEXT: cnt v0.16b, v0.16b
; CHECK-NEXT: uaddlv h1, v0.16b
; CHECK-NEXT: // implicit-def: $q0
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: fmov s0, s1
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: add w0, w8, w9
; CHECK-NEXT: ret
@ -71,7 +71,7 @@ define <1 x i128> @popcount1x128(<1 x i128> %0) {
; CHECK-NEXT: cnt v0.16b, v0.16b
; CHECK-NEXT: uaddlv h1, v0.16b
; CHECK-NEXT: // implicit-def: $q0
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: fmov s0, s1
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: // kill: def $x0 killed $w0
; CHECK-NEXT: movi v0.2d, #0000000000000000

View File

@ -518,7 +518,7 @@ define double @sqrt_fdiv_common_operand_extra_use(double %x, double* %p) nounwin
; CHECK-NEXT: fmul d1, d0, d1
; CHECK-NEXT: fcsel d0, d0, d1, eq
; CHECK-NEXT: str d0, [x0]
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: fmov d0, d1
; CHECK-NEXT: ret
%sqrt = call fast double @llvm.sqrt.f64(double %x)
store double %sqrt, double* %p

View File

@ -234,9 +234,9 @@ define swiftcc { i8, i8, i8, i8 } @gen9(i8 %key) {
}
; CHECK-LABEL: _gen10
; CHECK: mov.16b v1, v0
; CHECK: mov.16b v2, v0
; CHECK: mov.16b v3, v0
; CHECK: fmov d1, d0
; CHECK: fmov d2, d0
; CHECK: fmov d3, d0
; CHECK: mov w1, w0
; CHECK: mov w2, w0
; CHECK: mov w3, w0
@ -278,7 +278,7 @@ declare swiftcc { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @gen11()
; CHECK-LABEL: _test12
; CHECK: fadd.4s v0, v0, v1
; CHECK: fadd.4s v0, v0, v2
; CHECK: mov.16b v1, v3
; CHECK: fmov s1, s3
define swiftcc { <4 x float>, float } @test12() #0 {
entry:
%call = call swiftcc { <4 x float>, <4 x float>, <4 x float>, float } @gen12()

View File

@ -171,7 +171,7 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) {
; CHECK-NEXT: mov v1.h[1], w10
; CHECK-NEXT: mov v1.h[2], w9
; CHECK-NEXT: mov v1.h[3], w8
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: fmov d0, d1
; CHECK-NEXT: ret
%1 = urem <4 x i16> %x, <i16 64, i16 32, i16 8, i16 95>
ret <4 x i16> %1
@ -208,7 +208,7 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) {
; CHECK-NEXT: mov v1.h[2], w8
; CHECK-NEXT: msub w8, w11, w9, w10
; CHECK-NEXT: mov v1.h[3], w8
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: fmov d0, d1
; CHECK-NEXT: ret
%1 = urem <4 x i16> %x, <i16 1, i16 654, i16 23, i16 5423>
ret <4 x i16> %1

View File

@ -145,31 +145,31 @@ define <5 x float> @sin_v5f32(<5 x float> %x) nounwind {
; CHECK-NEXT: stp d11, d10, [sp, #8] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #24] // 16-byte Folded Spill
; CHECK-NEXT: str x30, [sp, #40] // 8-byte Folded Spill
; CHECK-NEXT: mov v8.16b, v4.16b
; CHECK-NEXT: mov v9.16b, v3.16b
; CHECK-NEXT: mov v10.16b, v2.16b
; CHECK-NEXT: mov v11.16b, v1.16b
; CHECK-NEXT: fmov s8, s4
; CHECK-NEXT: fmov s9, s3
; CHECK-NEXT: fmov s10, s2
; CHECK-NEXT: fmov s11, s1
; CHECK-NEXT: bl sinf
; CHECK-NEXT: mov v12.16b, v0.16b
; CHECK-NEXT: mov v0.16b, v11.16b
; CHECK-NEXT: fmov s12, s0
; CHECK-NEXT: fmov s0, s11
; CHECK-NEXT: bl sinf
; CHECK-NEXT: mov v11.16b, v0.16b
; CHECK-NEXT: mov v0.16b, v10.16b
; CHECK-NEXT: fmov s11, s0
; CHECK-NEXT: fmov s0, s10
; CHECK-NEXT: bl sinf
; CHECK-NEXT: mov v10.16b, v0.16b
; CHECK-NEXT: mov v0.16b, v9.16b
; CHECK-NEXT: fmov s10, s0
; CHECK-NEXT: fmov s0, s9
; CHECK-NEXT: bl sinf
; CHECK-NEXT: mov v9.16b, v0.16b
; CHECK-NEXT: mov v0.16b, v8.16b
; CHECK-NEXT: fmov s9, s0
; CHECK-NEXT: fmov s0, s8
; CHECK-NEXT: bl sinf
; CHECK-NEXT: mov v1.16b, v11.16b
; CHECK-NEXT: mov v2.16b, v10.16b
; CHECK-NEXT: mov v3.16b, v9.16b
; CHECK-NEXT: fmov s1, s11
; CHECK-NEXT: fmov s2, s10
; CHECK-NEXT: fmov s3, s9
; CHECK-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload
; CHECK-NEXT: ldp d9, d8, [sp, #24] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #8] // 16-byte Folded Reload
; CHECK-NEXT: mov v4.16b, v0.16b
; CHECK-NEXT: mov v0.16b, v12.16b
; CHECK-NEXT: fmov s4, s0
; CHECK-NEXT: fmov s0, s12
; CHECK-NEXT: ldr d12, [sp], #48 // 8-byte Folded Reload
; CHECK-NEXT: ret
%r = call <5 x float> @llvm.sin.v5f32(<5 x float> %x)
@ -183,36 +183,36 @@ define <6 x float> @sin_v6f32(<6 x float> %x) nounwind {
; CHECK-NEXT: stp d11, d10, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
; CHECK-NEXT: mov v8.16b, v5.16b
; CHECK-NEXT: mov v9.16b, v4.16b
; CHECK-NEXT: mov v10.16b, v3.16b
; CHECK-NEXT: mov v11.16b, v2.16b
; CHECK-NEXT: mov v12.16b, v1.16b
; CHECK-NEXT: fmov s8, s5
; CHECK-NEXT: fmov s9, s4
; CHECK-NEXT: fmov s10, s3
; CHECK-NEXT: fmov s11, s2
; CHECK-NEXT: fmov s12, s1
; CHECK-NEXT: bl sinf
; CHECK-NEXT: mov v13.16b, v0.16b
; CHECK-NEXT: mov v0.16b, v12.16b
; CHECK-NEXT: fmov s13, s0
; CHECK-NEXT: fmov s0, s12
; CHECK-NEXT: bl sinf
; CHECK-NEXT: mov v12.16b, v0.16b
; CHECK-NEXT: mov v0.16b, v11.16b
; CHECK-NEXT: fmov s12, s0
; CHECK-NEXT: fmov s0, s11
; CHECK-NEXT: bl sinf
; CHECK-NEXT: mov v11.16b, v0.16b
; CHECK-NEXT: mov v0.16b, v10.16b
; CHECK-NEXT: fmov s11, s0
; CHECK-NEXT: fmov s0, s10
; CHECK-NEXT: bl sinf
; CHECK-NEXT: mov v10.16b, v0.16b
; CHECK-NEXT: mov v0.16b, v9.16b
; CHECK-NEXT: fmov s10, s0
; CHECK-NEXT: fmov s0, s9
; CHECK-NEXT: bl sinf
; CHECK-NEXT: mov v9.16b, v0.16b
; CHECK-NEXT: mov v0.16b, v8.16b
; CHECK-NEXT: fmov s9, s0
; CHECK-NEXT: fmov s0, s8
; CHECK-NEXT: bl sinf
; CHECK-NEXT: mov v2.16b, v11.16b
; CHECK-NEXT: mov v3.16b, v10.16b
; CHECK-NEXT: mov v4.16b, v9.16b
; CHECK-NEXT: fmov s2, s11
; CHECK-NEXT: fmov s3, s10
; CHECK-NEXT: fmov s4, s9
; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
; CHECK-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: mov v5.16b, v0.16b
; CHECK-NEXT: mov v0.16b, v13.16b
; CHECK-NEXT: mov v1.16b, v12.16b
; CHECK-NEXT: fmov s5, s0
; CHECK-NEXT: fmov s0, s13
; CHECK-NEXT: fmov s1, s12
; CHECK-NEXT: ldp d13, d12, [sp], #64 // 16-byte Folded Reload
; CHECK-NEXT: ret
%r = call <6 x float> @llvm.sin.v6f32(<6 x float> %x)
@ -225,20 +225,20 @@ define <3 x double> @sin_v3f64(<3 x double> %x) nounwind {
; CHECK-NEXT: str d10, [sp, #-32]! // 8-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #8] // 16-byte Folded Spill
; CHECK-NEXT: str x30, [sp, #24] // 8-byte Folded Spill
; CHECK-NEXT: mov v8.16b, v2.16b
; CHECK-NEXT: mov v9.16b, v1.16b
; CHECK-NEXT: fmov d8, d2
; CHECK-NEXT: fmov d9, d1
; CHECK-NEXT: bl sin
; CHECK-NEXT: mov v10.16b, v0.16b
; CHECK-NEXT: mov v0.16b, v9.16b
; CHECK-NEXT: fmov d10, d0
; CHECK-NEXT: fmov d0, d9
; CHECK-NEXT: bl sin
; CHECK-NEXT: mov v9.16b, v0.16b
; CHECK-NEXT: mov v0.16b, v8.16b
; CHECK-NEXT: fmov d9, d0
; CHECK-NEXT: fmov d0, d8
; CHECK-NEXT: bl sin
; CHECK-NEXT: mov v1.16b, v9.16b
; CHECK-NEXT: fmov d1, d9
; CHECK-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload
; CHECK-NEXT: ldp d9, d8, [sp, #8] // 16-byte Folded Reload
; CHECK-NEXT: mov v2.16b, v0.16b
; CHECK-NEXT: mov v0.16b, v10.16b
; CHECK-NEXT: fmov d2, d0
; CHECK-NEXT: fmov d0, d10
; CHECK-NEXT: ldr d10, [sp], #32 // 8-byte Folded Reload
; CHECK-NEXT: ret
%r = call <3 x double> @llvm.sin.v3f64(<3 x double> %x)

View File

@ -1,5 +1,4 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s
; Test LSR for giving small constants, which get re-associated as unfolded
@ -34,7 +33,7 @@ define float @test1(float* nocapture readonly %arr, i64 %start, float %threshold
; CHECK-NEXT: fmov s0, #-7.00000000
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB0_5: // %cleanup2
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: fmov s0, s1
; CHECK-NEXT: ret
entry:
%cmp11 = icmp eq i64 %start, 0
@ -81,7 +80,7 @@ define float @test2(float* nocapture readonly %arr, i64 %start, float %threshold
; CHECK-NEXT: fmov s0, #-7.00000000
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB1_5: // %cleanup4
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: fmov s0, s1
; CHECK-NEXT: ret
entry:
%cmp14 = icmp eq i64 %start, 0