llvm-project/llvm/test/CodeGen/PowerPC/load-two-flts.ll

; RUN: llc -verify-machineinstrs < %s | FileCheck %s
target datalayout = "E-m:e-i64:64-n32:64"
target triple = "powerpc64-bgq-linux"

define void @_Z4testSt7complexIfE(float %v0, float %v1, i64* %ref.tmp, float* %_M_value.realp.i.i, float* %_M_value.imagp.i.i) {
entry:
  %v2 = load i64, i64* %ref.tmp, align 8
  %v3 = lshr i64 %v2, 32
  %v4 = trunc i64 %v3 to i32
  %v5 = bitcast i32 %v4 to float
  %v6 = trunc i64 %v2 to i32
  %v7 = bitcast i32 %v6 to float
  %mul_ad.i.i = fmul fast float %v5, %v1
  %mul_bc.i.i = fmul fast float %v7, %v0
  %mul_i.i.i = fadd fast float %mul_ad.i.i, %mul_bc.i.i
  %mul_ac.i.i = fmul fast float %v5, %v0
  %mul_bd.i.i = fmul fast float %v7, %v1
  %mul_r.i.i = fsub fast float %mul_ac.i.i, %mul_bd.i.i
  store float %mul_r.i.i, float* %_M_value.realp.i.i, align 4
  store float %mul_i.i.i, float* %_M_value.imagp.i.i, align 4
  ret void

; CHECK-LABEL: @_Z4testSt7complexIfE
; CHECK-NOT: ld {{[0-9]+}}, 0(5)
; CHECK-NOT: stw
; CHECK-NOT: rldicl
; CHECK-DAG: lfs {{[0-9]+}}, 4(5)
; CHECK-DAG: lfs {{[0-9]+}}, 0(5)
; CHECK: blr
}

define i64* @_Z4testSt7complexIfE_idx(float %v0, float %v1, i64* %ref.tmp, float* %_M_value.realp.i.i, float* %_M_value.imagp.i.i) {
entry:
  %r = getelementptr i64, i64* %ref.tmp, i64 1
  %v2 = load i64, i64* %r, align 8
  %v3 = lshr i64 %v2, 32
  %v4 = trunc i64 %v3 to i32
  %v5 = bitcast i32 %v4 to float
  %v6 = trunc i64 %v2 to i32
  %v7 = bitcast i32 %v6 to float
  %mul_ad.i.i = fmul fast float %v5, %v1
  %mul_bc.i.i = fmul fast float %v7, %v0
  %mul_i.i.i = fadd fast float %mul_ad.i.i, %mul_bc.i.i
  %mul_ac.i.i = fmul fast float %v5, %v0
  %mul_bd.i.i = fmul fast float %v7, %v1
  %mul_r.i.i = fsub fast float %mul_ac.i.i, %mul_bd.i.i
  store float %mul_r.i.i, float* %_M_value.realp.i.i, align 4
  store float %mul_i.i.i, float* %_M_value.imagp.i.i, align 4
  ret i64* %r

; CHECK-LABEL: @_Z4testSt7complexIfE
; CHECK-NOT: ld {{[0-9]+}}, 8(5)
; CHECK-NOT: ldu {{[0-9]+}}, 8(5)
; CHECK-NOT: stw
; CHECK-NOT: rldicl
; CHECK-DAG: lfsu {{[0-9]+}}, 8(5)
; CHECK-DAG: lfs {{[0-9]+}}, 4(5)
; CHECK: blr
}
Adding -verify-machineinstrs option to PowerPC tests Currently we have a number of tests that fail with -verify-machineinstrs. To detect this cases earlier we add the option to the testcases with the exception of tests that will currently fail with this option. PR 27456 keeps track of this failures. No code review, as discussed with Hal Finkel. llvm-svn: 277624 2016-08-04 02:17:35 +08:00			`; RUN: llc -verify-machineinstrs < %s \| FileCheck %s`
[PowerPC] Load two floats directly instead of using one 64-bit integer load When dealing with complex<float>, and similar structures with two single-precision floating-point numbers, especially when such things are being passed around by value, we'll sometimes end up loading both float values by extracting them from one 64-bit integer load. It looks like this: t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64 t16: i64 = srl t13, Constant:i32<32> t17: i32 = truncate t16 t18: f32 = bitcast t17 t19: i32 = truncate t13 t20: f32 = bitcast t19 The problem, especially before the P8 where those bitcasts aren't legal (and get expanded via the stack), is that it would have been better to use two floating-point loads directly. Here we add a target-specific DAGCombine to do just that. In short, we turn: ld 3, 0(5) stw 3, -8(1) rldicl 3, 3, 32, 32 stw 3, -4(1) lfs 3, -4(1) lfs 0, -8(1) into: lfs 3, 4(5) lfs 0, 0(5) llvm-svn: 264988 2016-03-31 10:56:05 +08:00			`target datalayout = "E-m:e-i64:64-n32:64"`
			`target triple = "powerpc64-bgq-linux"`

			`define void @_Z4testSt7complexIfE(float %v0, float %v1, i64* %ref.tmp, float* %_M_value.realp.i.i, float* %_M_value.imagp.i.i) {`
			`entry:`
			`%v2 = load i64, i64* %ref.tmp, align 8`
			`%v3 = lshr i64 %v2, 32`
			`%v4 = trunc i64 %v3 to i32`
			`%v5 = bitcast i32 %v4 to float`
			`%v6 = trunc i64 %v2 to i32`
			`%v7 = bitcast i32 %v6 to float`
			`%mul_ad.i.i = fmul fast float %v5, %v1`
			`%mul_bc.i.i = fmul fast float %v7, %v0`
			`%mul_i.i.i = fadd fast float %mul_ad.i.i, %mul_bc.i.i`
			`%mul_ac.i.i = fmul fast float %v5, %v0`
			`%mul_bd.i.i = fmul fast float %v7, %v1`
			`%mul_r.i.i = fsub fast float %mul_ac.i.i, %mul_bd.i.i`
			`store float %mul_r.i.i, float* %_M_value.realp.i.i, align 4`
			`store float %mul_i.i.i, float* %_M_value.imagp.i.i, align 4`
			`ret void`

			`; CHECK-LABEL: @_Z4testSt7complexIfE`
			`; CHECK-NOT: ld {{[0-9]+}}, 0(5)`
			`; CHECK-NOT: stw`
			`; CHECK-NOT: rldicl`
			`; CHECK-DAG: lfs {{[0-9]+}}, 4(5)`
			`; CHECK-DAG: lfs {{[0-9]+}}, 0(5)`
			`; CHECK: blr`
			`}`

			`define i64* @_Z4testSt7complexIfE_idx(float %v0, float %v1, i64* %ref.tmp, float* %_M_value.realp.i.i, float* %_M_value.imagp.i.i) {`
			`entry:`
			`%r = getelementptr i64, i64* %ref.tmp, i64 1`
			`%v2 = load i64, i64* %r, align 8`
			`%v3 = lshr i64 %v2, 32`
			`%v4 = trunc i64 %v3 to i32`
			`%v5 = bitcast i32 %v4 to float`
			`%v6 = trunc i64 %v2 to i32`
			`%v7 = bitcast i32 %v6 to float`
			`%mul_ad.i.i = fmul fast float %v5, %v1`
			`%mul_bc.i.i = fmul fast float %v7, %v0`
			`%mul_i.i.i = fadd fast float %mul_ad.i.i, %mul_bc.i.i`
			`%mul_ac.i.i = fmul fast float %v5, %v0`
			`%mul_bd.i.i = fmul fast float %v7, %v1`
			`%mul_r.i.i = fsub fast float %mul_ac.i.i, %mul_bd.i.i`
			`store float %mul_r.i.i, float* %_M_value.realp.i.i, align 4`
			`store float %mul_i.i.i, float* %_M_value.imagp.i.i, align 4`
			`ret i64* %r`

			`; CHECK-LABEL: @_Z4testSt7complexIfE`
			`; CHECK-NOT: ld {{[0-9]+}}, 8(5)`
			`; CHECK-NOT: ldu {{[0-9]+}}, 8(5)`
			`; CHECK-NOT: stw`
			`; CHECK-NOT: rldicl`
			`; CHECK-DAG: lfsu {{[0-9]+}}, 8(5)`
			`; CHECK-DAG: lfs {{[0-9]+}}, 4(5)`
			`; CHECK: blr`
			`}`