llvm-project/llvm/test/CodeGen/R600/fmuladd.ll

; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck %s

declare float @llvm.fmuladd.f32(float, float, float)
declare double @llvm.fmuladd.f64(double, double, double)
declare i32 @llvm.r600.read.tidig.x() nounwind readnone
declare float @llvm.fabs.f32(float) nounwind readnone

; CHECK-LABEL: {{^}}fmuladd_f32:
; CHECK: V_MAD_F32 {{v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+}}

define void @fmuladd_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
                         float addrspace(1)* %in2, float addrspace(1)* %in3) {
   %r0 = load float addrspace(1)* %in1
   %r1 = load float addrspace(1)* %in2
   %r2 = load float addrspace(1)* %in3
   %r3 = tail call float @llvm.fmuladd.f32(float %r0, float %r1, float %r2)
   store float %r3, float addrspace(1)* %out
   ret void
}

; CHECK-LABEL: {{^}}fmuladd_f64:
; CHECK: V_FMA_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}

define void @fmuladd_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
                         double addrspace(1)* %in2, double addrspace(1)* %in3) {
   %r0 = load double addrspace(1)* %in1
   %r1 = load double addrspace(1)* %in2
   %r2 = load double addrspace(1)* %in3
   %r3 = tail call double @llvm.fmuladd.f64(double %r0, double %r1, double %r2)
   store double %r3, double addrspace(1)* %out
   ret void
}

; CHECK-LABEL: {{^}}fmuladd_2.0_a_b_f32
; CHECK-DAG: BUFFER_LOAD_DWORD [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; CHECK-DAG: BUFFER_LOAD_DWORD [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
; CHECK: V_MAD_F32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]
; CHECK: BUFFER_STORE_DWORD [[RESULT]]
define void @fmuladd_2.0_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
  %gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
  %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
  %gep.out = getelementptr float addrspace(1)* %out, i32 %tid

  %r1 = load float addrspace(1)* %gep.0
  %r2 = load float addrspace(1)* %gep.1

  %r3 = tail call float @llvm.fmuladd.f32(float 2.0, float %r1, float %r2)
  store float %r3, float addrspace(1)* %gep.out
  ret void
}

; CHECK-LABEL: {{^}}fmuladd_a_2.0_b_f32
; CHECK-DAG: BUFFER_LOAD_DWORD [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; CHECK-DAG: BUFFER_LOAD_DWORD [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
; CHECK: V_MAD_F32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]
; CHECK: BUFFER_STORE_DWORD [[RESULT]]
define void @fmuladd_a_2.0_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
  %gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
  %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
  %gep.out = getelementptr float addrspace(1)* %out, i32 %tid

  %r1 = load float addrspace(1)* %gep.0
  %r2 = load float addrspace(1)* %gep.1

  %r3 = tail call float @llvm.fmuladd.f32(float %r1, float 2.0, float %r2)
  store float %r3, float addrspace(1)* %gep.out
  ret void
}

; CHECK-LABEL: {{^}}fadd_a_a_b_f32:
; CHECK-DAG: BUFFER_LOAD_DWORD [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; CHECK-DAG: BUFFER_LOAD_DWORD [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
; CHECK: V_MAD_F32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]
; CHECK: BUFFER_STORE_DWORD [[RESULT]]
define void @fadd_a_a_b_f32(float addrspace(1)* %out,
                            float addrspace(1)* %in1,
                            float addrspace(1)* %in2) {
  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
  %gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
  %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
  %gep.out = getelementptr float addrspace(1)* %out, i32 %tid

  %r0 = load float addrspace(1)* %gep.0
  %r1 = load float addrspace(1)* %gep.1

  %add.0 = fadd float %r0, %r0
  %add.1 = fadd float %add.0, %r1
  store float %add.1, float addrspace(1)* %out
  ret void
}

; CHECK-LABEL: {{^}}fadd_b_a_a_f32:
; CHECK-DAG: BUFFER_LOAD_DWORD [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; CHECK-DAG: BUFFER_LOAD_DWORD [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
; CHECK: V_MAD_F32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]
; CHECK: BUFFER_STORE_DWORD [[RESULT]]
define void @fadd_b_a_a_f32(float addrspace(1)* %out,
                            float addrspace(1)* %in1,
                            float addrspace(1)* %in2) {
  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
  %gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
  %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
  %gep.out = getelementptr float addrspace(1)* %out, i32 %tid

  %r0 = load float addrspace(1)* %gep.0
  %r1 = load float addrspace(1)* %gep.1

  %add.0 = fadd float %r0, %r0
  %add.1 = fadd float %r1, %add.0
  store float %add.1, float addrspace(1)* %out
  ret void
}

; CHECK-LABEL: {{^}}fmuladd_neg_2.0_a_b_f32
; CHECK-DAG: BUFFER_LOAD_DWORD [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; CHECK-DAG: BUFFER_LOAD_DWORD [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
; CHECK: V_MAD_F32 [[RESULT:v[0-9]+]], [[R1]], -2.0, [[R2]]
; CHECK: BUFFER_STORE_DWORD [[RESULT]]
define void @fmuladd_neg_2.0_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
  %gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
  %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
  %gep.out = getelementptr float addrspace(1)* %out, i32 %tid

  %r1 = load float addrspace(1)* %gep.0
  %r2 = load float addrspace(1)* %gep.1

  %r3 = tail call float @llvm.fmuladd.f32(float -2.0, float %r1, float %r2)
  store float %r3, float addrspace(1)* %gep.out
  ret void
}


; CHECK-LABEL: {{^}}fmuladd_neg_2.0_neg_a_b_f32
; CHECK-DAG: BUFFER_LOAD_DWORD [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; CHECK-DAG: BUFFER_LOAD_DWORD [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
; CHECK: V_MAD_F32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]
; CHECK: BUFFER_STORE_DWORD [[RESULT]]
define void @fmuladd_neg_2.0_neg_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
  %gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
  %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
  %gep.out = getelementptr float addrspace(1)* %out, i32 %tid

  %r1 = load float addrspace(1)* %gep.0
  %r2 = load float addrspace(1)* %gep.1

  %r1.fneg = fsub float -0.000000e+00, %r1

  %r3 = tail call float @llvm.fmuladd.f32(float -2.0, float %r1.fneg, float %r2)
  store float %r3, float addrspace(1)* %gep.out
  ret void
}


; CHECK-LABEL: {{^}}fmuladd_2.0_neg_a_b_f32
; CHECK-DAG: BUFFER_LOAD_DWORD [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; CHECK-DAG: BUFFER_LOAD_DWORD [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
; CHECK: V_MAD_F32 [[RESULT:v[0-9]+]], [[R1]], -2.0, [[R2]]
; CHECK: BUFFER_STORE_DWORD [[RESULT]]
define void @fmuladd_2.0_neg_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
  %gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
  %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
  %gep.out = getelementptr float addrspace(1)* %out, i32 %tid

  %r1 = load float addrspace(1)* %gep.0
  %r2 = load float addrspace(1)* %gep.1

  %r1.fneg = fsub float -0.000000e+00, %r1

  %r3 = tail call float @llvm.fmuladd.f32(float 2.0, float %r1.fneg, float %r2)
  store float %r3, float addrspace(1)* %gep.out
  ret void
}


; CHECK-LABEL: {{^}}fmuladd_2.0_a_neg_b_f32
; CHECK-DAG: BUFFER_LOAD_DWORD [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; CHECK-DAG: BUFFER_LOAD_DWORD [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
; CHECK: V_MAD_F32 [[RESULT:v[0-9]+]], 2.0, [[R1]], -[[R2]]
; CHECK: BUFFER_STORE_DWORD [[RESULT]]
define void @fmuladd_2.0_a_neg_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
  %gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
  %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
  %gep.out = getelementptr float addrspace(1)* %out, i32 %tid

  %r1 = load float addrspace(1)* %gep.0
  %r2 = load float addrspace(1)* %gep.1

  %r2.fneg = fsub float -0.000000e+00, %r2

  %r3 = tail call float @llvm.fmuladd.f32(float 2.0, float %r1, float %r2.fneg)
  store float %r3, float addrspace(1)* %gep.out
  ret void
}
R600/SI: Fix using mad with multiplies by 2 These turn into fadds, so combine them into the target mad node. fadd (fadd (a, a), b) -> mad 2.0, a, b llvm-svn: 218608 2014-09-29 22:59:34 +08:00			`; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s \| FileCheck %s`
R600/SI: FMA is faster than fmul and fadd for f64 llvm-svn: 188136 2013-08-10 18:38:54 +08:00
R600/SI: Fix using mad with multiplies by 2 These turn into fadds, so combine them into the target mad node. fadd (fadd (a, a), b) -> mad 2.0, a, b llvm-svn: 218608 2014-09-29 22:59:34 +08:00			`declare float @llvm.fmuladd.f32(float, float, float)`
			`declare double @llvm.fmuladd.f64(double, double, double)`
			`declare i32 @llvm.r600.read.tidig.x() nounwind readnone`
			`declare float @llvm.fabs.f32(float) nounwind readnone`

R600: Call EmitFunctionHeader() in the AsmPrinter to populate the ELF symbol table llvm-svn: 218776 2014-10-02 01:15:17 +08:00			`; CHECK-LABEL: {{^}}fmuladd_f32:`
R600/SI: Change formatting of printed registers. Print the range of registers used with a single letter prefix. This better matches what the shader compiler produces and is overall less obnoxious than concatenating all of the subregister names together. Instead of SGPR0, it will print s0. Instead of SGPR0_SGPR1, it will print s[0:1] and so on. There doesn't appear to be a straightforward way to get the actual register info in the InstPrinter, so this parses the generated name to print with the new syntax. The required test changes are pretty nasty, and register matching regexes are now worse. Since there isn't a way to add to a variable in FileCheck, some of the tests now don't check the exact number of registers used, but I don't think that will be a real problem. llvm-svn: 194443 2013-11-12 10:35:51 +08:00			`; CHECK: V_MAD_F32 {{v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+}}`
R600/SI: FMA is faster than fmul and fadd for f64 llvm-svn: 188136 2013-08-10 18:38:54 +08:00
			`define void @fmuladd_f32(float addrspace(1)* %out, float addrspace(1)* %in1,`
			`float addrspace(1)* %in2, float addrspace(1)* %in3) {`
			`%r0 = load float addrspace(1)* %in1`
			`%r1 = load float addrspace(1)* %in2`
			`%r2 = load float addrspace(1)* %in3`
			`%r3 = tail call float @llvm.fmuladd.f32(float %r0, float %r1, float %r2)`
			`store float %r3, float addrspace(1)* %out`
			`ret void`
			`}`

R600: Call EmitFunctionHeader() in the AsmPrinter to populate the ELF symbol table llvm-svn: 218776 2014-10-02 01:15:17 +08:00			`; CHECK-LABEL: {{^}}fmuladd_f64:`
R600/SI: Change formatting of printed registers. Print the range of registers used with a single letter prefix. This better matches what the shader compiler produces and is overall less obnoxious than concatenating all of the subregister names together. Instead of SGPR0, it will print s0. Instead of SGPR0_SGPR1, it will print s[0:1] and so on. There doesn't appear to be a straightforward way to get the actual register info in the InstPrinter, so this parses the generated name to print with the new syntax. The required test changes are pretty nasty, and register matching regexes are now worse. Since there isn't a way to add to a variable in FileCheck, some of the tests now don't check the exact number of registers used, but I don't think that will be a real problem. llvm-svn: 194443 2013-11-12 10:35:51 +08:00			`; CHECK: V_FMA_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}`
R600/SI: FMA is faster than fmul and fadd for f64 llvm-svn: 188136 2013-08-10 18:38:54 +08:00
			`define void @fmuladd_f64(double addrspace(1)* %out, double addrspace(1)* %in1,`
			`double addrspace(1)* %in2, double addrspace(1)* %in3) {`
			`%r0 = load double addrspace(1)* %in1`
			`%r1 = load double addrspace(1)* %in2`
			`%r2 = load double addrspace(1)* %in3`
			`%r3 = tail call double @llvm.fmuladd.f64(double %r0, double %r1, double %r2)`
			`store double %r3, double addrspace(1)* %out`
			`ret void`
			`}`

R600: Call EmitFunctionHeader() in the AsmPrinter to populate the ELF symbol table llvm-svn: 218776 2014-10-02 01:15:17 +08:00			`; CHECK-LABEL: {{^}}fmuladd_2.0_a_b_f32`
R600/SI: Fix using mad with multiplies by 2 These turn into fadds, so combine them into the target mad node. fadd (fadd (a, a), b) -> mad 2.0, a, b llvm-svn: 218608 2014-09-29 22:59:34 +08:00			`; CHECK-DAG: BUFFER_LOAD_DWORD [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}`
			`; CHECK-DAG: BUFFER_LOAD_DWORD [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4`
			`; CHECK: V_MAD_F32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]`
			`; CHECK: BUFFER_STORE_DWORD [[RESULT]]`
			`define void @fmuladd_2.0_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {`
			`%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone`
			`%gep.0 = getelementptr float addrspace(1)* %out, i32 %tid`
			`%gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1`
			`%gep.out = getelementptr float addrspace(1)* %out, i32 %tid`

			`%r1 = load float addrspace(1)* %gep.0`
			`%r2 = load float addrspace(1)* %gep.1`

			`%r3 = tail call float @llvm.fmuladd.f32(float 2.0, float %r1, float %r2)`
			`store float %r3, float addrspace(1)* %gep.out`
			`ret void`
			`}`

R600: Call EmitFunctionHeader() in the AsmPrinter to populate the ELF symbol table llvm-svn: 218776 2014-10-02 01:15:17 +08:00			`; CHECK-LABEL: {{^}}fmuladd_a_2.0_b_f32`
R600/SI: Fix using mad with multiplies by 2 These turn into fadds, so combine them into the target mad node. fadd (fadd (a, a), b) -> mad 2.0, a, b llvm-svn: 218608 2014-09-29 22:59:34 +08:00			`; CHECK-DAG: BUFFER_LOAD_DWORD [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}`
			`; CHECK-DAG: BUFFER_LOAD_DWORD [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4`
			`; CHECK: V_MAD_F32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]`
			`; CHECK: BUFFER_STORE_DWORD [[RESULT]]`
			`define void @fmuladd_a_2.0_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {`
			`%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone`
			`%gep.0 = getelementptr float addrspace(1)* %out, i32 %tid`
			`%gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1`
			`%gep.out = getelementptr float addrspace(1)* %out, i32 %tid`

			`%r1 = load float addrspace(1)* %gep.0`
			`%r2 = load float addrspace(1)* %gep.1`

			`%r3 = tail call float @llvm.fmuladd.f32(float %r1, float 2.0, float %r2)`
			`store float %r3, float addrspace(1)* %gep.out`
			`ret void`
			`}`

R600: Call EmitFunctionHeader() in the AsmPrinter to populate the ELF symbol table llvm-svn: 218776 2014-10-02 01:15:17 +08:00			`; CHECK-LABEL: {{^}}fadd_a_a_b_f32:`
R600/SI: Fix using mad with multiplies by 2 These turn into fadds, so combine them into the target mad node. fadd (fadd (a, a), b) -> mad 2.0, a, b llvm-svn: 218608 2014-09-29 22:59:34 +08:00			`; CHECK-DAG: BUFFER_LOAD_DWORD [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}`
			`; CHECK-DAG: BUFFER_LOAD_DWORD [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4`
			`; CHECK: V_MAD_F32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]`
			`; CHECK: BUFFER_STORE_DWORD [[RESULT]]`
			`define void @fadd_a_a_b_f32(float addrspace(1)* %out,`
			`float addrspace(1)* %in1,`
			`float addrspace(1)* %in2) {`
			`%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone`
			`%gep.0 = getelementptr float addrspace(1)* %out, i32 %tid`
			`%gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1`
			`%gep.out = getelementptr float addrspace(1)* %out, i32 %tid`

			`%r0 = load float addrspace(1)* %gep.0`
			`%r1 = load float addrspace(1)* %gep.1`

			`%add.0 = fadd float %r0, %r0`
			`%add.1 = fadd float %add.0, %r1`
			`store float %add.1, float addrspace(1)* %out`
			`ret void`
			`}`

R600: Call EmitFunctionHeader() in the AsmPrinter to populate the ELF symbol table llvm-svn: 218776 2014-10-02 01:15:17 +08:00			`; CHECK-LABEL: {{^}}fadd_b_a_a_f32:`
R600/SI: Fix using mad with multiplies by 2 These turn into fadds, so combine them into the target mad node. fadd (fadd (a, a), b) -> mad 2.0, a, b llvm-svn: 218608 2014-09-29 22:59:34 +08:00			`; CHECK-DAG: BUFFER_LOAD_DWORD [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}`
			`; CHECK-DAG: BUFFER_LOAD_DWORD [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4`
			`; CHECK: V_MAD_F32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]`
			`; CHECK: BUFFER_STORE_DWORD [[RESULT]]`
			`define void @fadd_b_a_a_f32(float addrspace(1)* %out,`
			`float addrspace(1)* %in1,`
			`float addrspace(1)* %in2) {`
			`%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone`
			`%gep.0 = getelementptr float addrspace(1)* %out, i32 %tid`
			`%gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1`
			`%gep.out = getelementptr float addrspace(1)* %out, i32 %tid`

			`%r0 = load float addrspace(1)* %gep.0`
			`%r1 = load float addrspace(1)* %gep.1`

			`%add.0 = fadd float %r0, %r0`
			`%add.1 = fadd float %r1, %add.0`
			`store float %add.1, float addrspace(1)* %out`
			`ret void`
			`}`

R600: Call EmitFunctionHeader() in the AsmPrinter to populate the ELF symbol table llvm-svn: 218776 2014-10-02 01:15:17 +08:00			`; CHECK-LABEL: {{^}}fmuladd_neg_2.0_a_b_f32`
R600/SI: Fix using mad with multiplies by 2 These turn into fadds, so combine them into the target mad node. fadd (fadd (a, a), b) -> mad 2.0, a, b llvm-svn: 218608 2014-09-29 22:59:34 +08:00			`; CHECK-DAG: BUFFER_LOAD_DWORD [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}`
			`; CHECK-DAG: BUFFER_LOAD_DWORD [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4`
			`; CHECK: V_MAD_F32 [[RESULT:v[0-9]+]], [[R1]], -2.0, [[R2]]`
			`; CHECK: BUFFER_STORE_DWORD [[RESULT]]`
			`define void @fmuladd_neg_2.0_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {`
			`%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone`
			`%gep.0 = getelementptr float addrspace(1)* %out, i32 %tid`
			`%gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1`
			`%gep.out = getelementptr float addrspace(1)* %out, i32 %tid`

			`%r1 = load float addrspace(1)* %gep.0`
			`%r2 = load float addrspace(1)* %gep.1`

			`%r3 = tail call float @llvm.fmuladd.f32(float -2.0, float %r1, float %r2)`
			`store float %r3, float addrspace(1)* %gep.out`
			`ret void`
			`}`


R600: Call EmitFunctionHeader() in the AsmPrinter to populate the ELF symbol table llvm-svn: 218776 2014-10-02 01:15:17 +08:00			`; CHECK-LABEL: {{^}}fmuladd_neg_2.0_neg_a_b_f32`
R600/SI: Fix using mad with multiplies by 2 These turn into fadds, so combine them into the target mad node. fadd (fadd (a, a), b) -> mad 2.0, a, b llvm-svn: 218608 2014-09-29 22:59:34 +08:00			`; CHECK-DAG: BUFFER_LOAD_DWORD [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}`
			`; CHECK-DAG: BUFFER_LOAD_DWORD [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4`
			`; CHECK: V_MAD_F32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]`
			`; CHECK: BUFFER_STORE_DWORD [[RESULT]]`
			`define void @fmuladd_neg_2.0_neg_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {`
			`%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone`
			`%gep.0 = getelementptr float addrspace(1)* %out, i32 %tid`
			`%gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1`
			`%gep.out = getelementptr float addrspace(1)* %out, i32 %tid`

			`%r1 = load float addrspace(1)* %gep.0`
			`%r2 = load float addrspace(1)* %gep.1`

			`%r1.fneg = fsub float -0.000000e+00, %r1`

			`%r3 = tail call float @llvm.fmuladd.f32(float -2.0, float %r1.fneg, float %r2)`
			`store float %r3, float addrspace(1)* %gep.out`
			`ret void`
			`}`


R600: Call EmitFunctionHeader() in the AsmPrinter to populate the ELF symbol table llvm-svn: 218776 2014-10-02 01:15:17 +08:00			`; CHECK-LABEL: {{^}}fmuladd_2.0_neg_a_b_f32`
R600/SI: Fix using mad with multiplies by 2 These turn into fadds, so combine them into the target mad node. fadd (fadd (a, a), b) -> mad 2.0, a, b llvm-svn: 218608 2014-09-29 22:59:34 +08:00			`; CHECK-DAG: BUFFER_LOAD_DWORD [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}`
			`; CHECK-DAG: BUFFER_LOAD_DWORD [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4`
			`; CHECK: V_MAD_F32 [[RESULT:v[0-9]+]], [[R1]], -2.0, [[R2]]`
			`; CHECK: BUFFER_STORE_DWORD [[RESULT]]`
			`define void @fmuladd_2.0_neg_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {`
			`%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone`
			`%gep.0 = getelementptr float addrspace(1)* %out, i32 %tid`
			`%gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1`
			`%gep.out = getelementptr float addrspace(1)* %out, i32 %tid`

			`%r1 = load float addrspace(1)* %gep.0`
			`%r2 = load float addrspace(1)* %gep.1`

			`%r1.fneg = fsub float -0.000000e+00, %r1`

			`%r3 = tail call float @llvm.fmuladd.f32(float 2.0, float %r1.fneg, float %r2)`
			`store float %r3, float addrspace(1)* %gep.out`
			`ret void`
			`}`
R600/SI: Also fix fsub + fadd a, a to mad combines llvm-svn: 218609 2014-09-29 22:59:38 +08:00

R600: Call EmitFunctionHeader() in the AsmPrinter to populate the ELF symbol table llvm-svn: 218776 2014-10-02 01:15:17 +08:00			`; CHECK-LABEL: {{^}}fmuladd_2.0_a_neg_b_f32`
R600/SI: Also fix fsub + fadd a, a to mad combines llvm-svn: 218609 2014-09-29 22:59:38 +08:00			`; CHECK-DAG: BUFFER_LOAD_DWORD [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}`
			`; CHECK-DAG: BUFFER_LOAD_DWORD [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4`
			`; CHECK: V_MAD_F32 [[RESULT:v[0-9]+]], 2.0, [[R1]], -[[R2]]`
			`; CHECK: BUFFER_STORE_DWORD [[RESULT]]`
			`define void @fmuladd_2.0_a_neg_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {`
			`%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone`
			`%gep.0 = getelementptr float addrspace(1)* %out, i32 %tid`
			`%gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1`
			`%gep.out = getelementptr float addrspace(1)* %out, i32 %tid`

			`%r1 = load float addrspace(1)* %gep.0`
			`%r2 = load float addrspace(1)* %gep.1`

			`%r2.fneg = fsub float -0.000000e+00, %r2`

			`%r3 = tail call float @llvm.fmuladd.f32(float 2.0, float %r1, float %r2.fneg)`
			`store float %r3, float addrspace(1)* %gep.out`
			`ret void`
			`}`