forked from OSchip/llvm-project
Mark FMOV constant materialization as being as cheap as a move.
This prevents us from doing things like LICM'ing it out of a loop, which is usually a net loss because we end up having to spill a callee-saved FPR to accomodate it. This does perturb instruction scheduling around this instruction, so a number of tests had to be updated to account for it. Reviewed By: t.p.northover Differential Revision: https://reviews.llvm.org/D87316
This commit is contained in:
parent
b0ae5332dc
commit
3d9c85e4d8
|
@ -3802,7 +3802,7 @@ let isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1,
|
|||
// Floating point immediate move.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
let isReMaterializable = 1 in {
|
||||
let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
|
||||
defm FMOV : FPMoveImmediate<"fmov">;
|
||||
}
|
||||
|
||||
|
|
|
@ -90,8 +90,8 @@ declare void @variadic(i32 %a, ...)
|
|||
; others. The extra arguments should go in registers rather than on the stack.
|
||||
define void @test_variadic() {
|
||||
call void(i32, ...) @variadic(i32 0, i64 1, double 2.0)
|
||||
; CHECK: fmov d0, #2.0
|
||||
; CHECK: mov w1, #1
|
||||
; CHECK: fmov d0, #2.0
|
||||
; CHECK: bl variadic
|
||||
ret void
|
||||
}
|
||||
|
|
|
@ -0,0 +1,33 @@
|
|||
; RUN: llc -mtriple=aarch64-linux-gnu -o - %s | FileCheck %s
|
||||
|
||||
; The purpose of this test is to check that an FMOV instruction that
|
||||
; only materializes an immediate is not MachineLICM'd out of a loop.
|
||||
; We check this in two ways: by looking for the FMOV inside the loop,
|
||||
; and also by checking that we're not spilling any FP callee-saved
|
||||
; registers.
|
||||
|
||||
%struct.Node = type { %struct.Node*, i8* }
|
||||
|
||||
define void @process_nodes(%struct.Node* %0) {
|
||||
; CHECK-LABEL: process_nodes:
|
||||
; CHECK-NOT: stp {{d[0-9]+}}
|
||||
; CHECK-LABEL: .LBB0_2:
|
||||
; CHECK: fmov s0, #1.00000000
|
||||
; CHECK: bl do_it
|
||||
entry:
|
||||
%1 = icmp eq %struct.Node* %0, null
|
||||
br i1 %1, label %exit, label %loop
|
||||
|
||||
loop:
|
||||
%2 = phi %struct.Node* [ %4, %loop ], [ %0, %entry ]
|
||||
tail call void @do_it(float 1.000000e+00, %struct.Node* nonnull %2)
|
||||
%3 = getelementptr inbounds %struct.Node, %struct.Node* %2, i64 0, i32 0
|
||||
%4 = load %struct.Node*, %struct.Node** %3, align 8
|
||||
%5 = icmp eq %struct.Node* %4, null
|
||||
br i1 %5, label %exit, label %loop
|
||||
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @do_it(float, %struct.Node*)
|
|
@ -20,8 +20,8 @@ define void @test_csel(i32 %lhs32, i32 %rhs32, i64 %lhs64) {
|
|||
%tst2 = icmp sle i64 %lhs64, %rhs64
|
||||
%val2 = select i1 %tst2, double 1.0, double 0.0
|
||||
store double %val2, double* @vardouble
|
||||
; FLT0 is reused from above on ARM64.
|
||||
; CHECK: fmov d[[FLT1:[0-9]+]], #1.0
|
||||
; CHECK-DAG: fmov d[[FLT0:[0-9]+]], xzr
|
||||
; CHECK-DAG: fmov d[[FLT1:[0-9]+]], #1.0
|
||||
; CHECK: fcsel {{d[0-9]+}}, d[[FLT1]], d[[FLT0]], le
|
||||
|
||||
call void @use_float(float 0.0)
|
||||
|
|
|
@ -90,12 +90,10 @@ define void @check_stack_args() {
|
|||
; memcpy gets created, but the following works for now.
|
||||
|
||||
; CHECK-DAG: str {{q[0-9]+}}, [sp]
|
||||
; CHECK-DAG: fmov d[[FINAL_DOUBLE:[0-9]+]], #1.0
|
||||
; CHECK: mov v0.16b, v[[FINAL_DOUBLE]].16b
|
||||
; CHECK-DAG: fmov d0, #1.0
|
||||
|
||||
; CHECK-NONEON-DAG: str {{q[0-9]+}}, [sp]
|
||||
; CHECK-NONEON-DAG: fmov d[[FINAL_DOUBLE:[0-9]+]], #1.0
|
||||
; CHECK-NONEON: fmov d0, d[[FINAL_DOUBLE]]
|
||||
; CHECK-NONEON-DAG: fmov d0, #1.0
|
||||
|
||||
; CHECK: bl struct_on_stack
|
||||
; CHECK-NOFP-NOT: fmov
|
||||
|
|
|
@ -69,16 +69,14 @@ define <4 x float> @pow_v4f32_one_fourth_not_enough_fmf(<4 x float> %x) nounwind
|
|||
; CHECK-LABEL: pow_v4f32_one_fourth_not_enough_fmf:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: sub sp, sp, #48 // =48
|
||||
; CHECK-NEXT: str d8, [sp, #32] // 8-byte Folded Spill
|
||||
; CHECK-NEXT: fmov s8, #0.25000000
|
||||
; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
|
||||
; CHECK-NEXT: mov s0, v0.s[1]
|
||||
; CHECK-NEXT: mov v1.16b, v8.16b
|
||||
; CHECK-NEXT: str x30, [sp, #40] // 8-byte Folded Spill
|
||||
; CHECK-NEXT: fmov s1, #0.25000000
|
||||
; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
|
||||
; CHECK-NEXT: bl powf
|
||||
; CHECK-NEXT: str d0, [sp] // 16-byte Folded Spill
|
||||
; CHECK-NEXT: fmov s1, #0.25000000
|
||||
; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
|
||||
; CHECK-NEXT: mov v1.16b, v8.16b
|
||||
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
|
||||
; CHECK-NEXT: bl powf
|
||||
; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload
|
||||
|
@ -86,7 +84,7 @@ define <4 x float> @pow_v4f32_one_fourth_not_enough_fmf(<4 x float> %x) nounwind
|
|||
; CHECK-NEXT: mov v0.s[1], v1.s[0]
|
||||
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
|
||||
; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
|
||||
; CHECK-NEXT: mov v1.16b, v8.16b
|
||||
; CHECK-NEXT: fmov s1, #0.25000000
|
||||
; CHECK-NEXT: mov s0, v0.s[2]
|
||||
; CHECK-NEXT: bl powf
|
||||
; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload
|
||||
|
@ -94,12 +92,11 @@ define <4 x float> @pow_v4f32_one_fourth_not_enough_fmf(<4 x float> %x) nounwind
|
|||
; CHECK-NEXT: mov v1.s[2], v0.s[0]
|
||||
; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
|
||||
; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill
|
||||
; CHECK-NEXT: mov v1.16b, v8.16b
|
||||
; CHECK-NEXT: fmov s1, #0.25000000
|
||||
; CHECK-NEXT: mov s0, v0.s[3]
|
||||
; CHECK-NEXT: bl powf
|
||||
; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload
|
||||
; CHECK-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ldr d8, [sp, #32] // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
|
||||
; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
|
||||
; CHECK-NEXT: mov v1.s[3], v0.s[0]
|
||||
; CHECK-NEXT: mov v0.16b, v1.16b
|
||||
|
@ -113,21 +110,18 @@ define <2 x double> @pow_v2f64_one_fourth_not_enough_fmf(<2 x double> %x) nounwi
|
|||
; CHECK-LABEL: pow_v2f64_one_fourth_not_enough_fmf:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: sub sp, sp, #48 // =48
|
||||
; CHECK-NEXT: str d8, [sp, #32] // 8-byte Folded Spill
|
||||
; CHECK-NEXT: fmov d8, #0.25000000
|
||||
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
|
||||
; CHECK-NEXT: mov d0, v0.d[1]
|
||||
; CHECK-NEXT: mov v1.16b, v8.16b
|
||||
; CHECK-NEXT: str x30, [sp, #40] // 8-byte Folded Spill
|
||||
; CHECK-NEXT: fmov d1, #0.25000000
|
||||
; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
|
||||
; CHECK-NEXT: bl pow
|
||||
; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
|
||||
; CHECK-NEXT: fmov d1, #0.25000000
|
||||
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
|
||||
; CHECK-NEXT: mov v1.16b, v8.16b
|
||||
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
|
||||
; CHECK-NEXT: bl pow
|
||||
; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
|
||||
; CHECK-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ldr d8, [sp, #32] // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
|
||||
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
||||
; CHECK-NEXT: mov v0.d[1], v1.d[0]
|
||||
; CHECK-NEXT: add sp, sp, #48 // =48
|
||||
|
|
|
@ -339,14 +339,14 @@ define float @foo_vararg(%swift_error** swifterror %error_ptr_ref, ...) {
|
|||
; CHECK-APPLE: malloc
|
||||
|
||||
; First vararg
|
||||
; CHECK-APPLE-AARCH64: ldr {{w[0-9]+}}, [{{.*}}[[TMP:x[0-9]+]], #16]
|
||||
; CHECK-APPLE-AARCH64: mov [[ID:w[0-9]+]], #1
|
||||
; CHECK-APPLE-AARCH64: ldr {{w[0-9]+}}, [{{.*}}[[TMP:x[0-9]+]], #16]
|
||||
; CHECK-APPLE-AARCH64: add [[ARGS:x[0-9]+]], [[TMP]], #16
|
||||
; Third vararg
|
||||
; CHECK-APPLE-AARCH64: ldr {{w[0-9]+}}, [{{.*}}[[TMP]], #32]
|
||||
; CHECK-APPLE-AARCH64: strb [[ID]], [x0, #8]
|
||||
; Second vararg
|
||||
; CHECK-APPLE-AARCH64: ldr {{w[0-9]+}}, [{{.*}}[[TMP]], #24]
|
||||
; Third vararg
|
||||
; CHECK-APPLE-AARCH64: ldr {{w[0-9]+}}, [{{.*}}[[TMP]], #32]
|
||||
|
||||
; CHECK-APPLE-ARM64_32: mov [[ID:w[0-9]+]], #1
|
||||
; CHECK-APPLE-ARM64_32: add [[ARGS:x[0-9]+]], [[TMP:x[0-9]+]], #16
|
||||
|
|
|
@ -18,7 +18,6 @@
|
|||
define float @test1(float* nocapture readonly %arr, i64 %start, float %threshold) {
|
||||
; CHECK-LABEL: test1:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: fmov s2, #-7.00000000
|
||||
; CHECK-NEXT: cbz x1, .LBB0_4
|
||||
; CHECK-NEXT: // %bb.1: // %for.body.preheader
|
||||
; CHECK-NEXT: add x8, x0, #28 // =28
|
||||
|
@ -32,7 +31,7 @@ define float @test1(float* nocapture readonly %arr, i64 %start, float %threshold
|
|||
; CHECK-NEXT: add x1, x1, #1 // =1
|
||||
; CHECK-NEXT: cbnz x1, .LBB0_2
|
||||
; CHECK-NEXT: .LBB0_4:
|
||||
; CHECK-NEXT: mov v0.16b, v2.16b
|
||||
; CHECK-NEXT: fmov s0, #-7.00000000
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK-NEXT: .LBB0_5: // %cleanup2
|
||||
; CHECK-NEXT: mov v0.16b, v1.16b
|
||||
|
@ -64,23 +63,22 @@ cleanup2: ; preds = %for.cond, %for.body
|
|||
define float @test2(float* nocapture readonly %arr, i64 %start, float %threshold) {
|
||||
; CHECK-LABEL: test2:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: fmov s2, #-7.00000000
|
||||
; CHECK-NEXT: cbz x1, .LBB1_4
|
||||
; CHECK-NEXT: // %bb.1: // %for.body.preheader
|
||||
; CHECK-NEXT: add x8, x0, #28 // =28
|
||||
; CHECK-NEXT: .LBB1_2: // %for.body
|
||||
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: ldr s1, [x8, x1, lsl #2]
|
||||
; CHECK-NEXT: scvtf s3, x1
|
||||
; CHECK-NEXT: fadd s3, s3, s0
|
||||
; CHECK-NEXT: fcmp s1, s3
|
||||
; CHECK-NEXT: scvtf s2, x1
|
||||
; CHECK-NEXT: fadd s2, s2, s0
|
||||
; CHECK-NEXT: fcmp s1, s2
|
||||
; CHECK-NEXT: b.gt .LBB1_5
|
||||
; CHECK-NEXT: // %bb.3: // %for.cond
|
||||
; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1
|
||||
; CHECK-NEXT: add x1, x1, #1 // =1
|
||||
; CHECK-NEXT: cbnz x1, .LBB1_2
|
||||
; CHECK-NEXT: .LBB1_4:
|
||||
; CHECK-NEXT: mov v0.16b, v2.16b
|
||||
; CHECK-NEXT: fmov s0, #-7.00000000
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK-NEXT: .LBB1_5: // %cleanup4
|
||||
; CHECK-NEXT: mov v0.16b, v1.16b
|
||||
|
|
Loading…
Reference in New Issue