llvm-project/llvm/test/CodeGen/R600/commute_modifiers.ll

; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s

declare i32 @llvm.r600.read.tidig.x() #1
declare float @llvm.fabs.f32(float) #1
declare float @llvm.fma.f32(float, float, float) nounwind readnone

; FUNC-LABEL: @commute_add_imm_fabs_f32
; SI: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI: v_add_f32_e64 [[REG:v[0-9]+]], 2.0, |[[X]]|
; SI-NEXT: buffer_store_dword [[REG]]
define void @commute_add_imm_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
  %tid = call i32 @llvm.r600.read.tidig.x() #1
  %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
  %x = load float addrspace(1)* %gep.0
  %x.fabs = call float @llvm.fabs.f32(float %x) #1
  %z = fadd float 2.0, %x.fabs
  store float %z, float addrspace(1)* %out
  ret void
}

; FUNC-LABEL: @commute_mul_imm_fneg_fabs_f32
; SI: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI: v_mul_f32_e64 [[REG:v[0-9]+]], -4.0, |[[X]]|
; SI-NEXT: buffer_store_dword [[REG]]
define void @commute_mul_imm_fneg_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
  %tid = call i32 @llvm.r600.read.tidig.x() #1
  %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
  %x = load float addrspace(1)* %gep.0
  %x.fabs = call float @llvm.fabs.f32(float %x) #1
  %x.fneg.fabs = fsub float -0.000000e+00, %x.fabs
  %z = fmul float 4.0, %x.fneg.fabs
  store float %z, float addrspace(1)* %out
  ret void
}

; FUNC-LABEL: @commute_mul_imm_fneg_f32
; SI: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI: v_mul_f32_e32 [[REG:v[0-9]+]], -4.0, [[X]]
; SI-NEXT: buffer_store_dword [[REG]]
define void @commute_mul_imm_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
  %tid = call i32 @llvm.r600.read.tidig.x() #1
  %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
  %x = load float addrspace(1)* %gep.0
  %x.fneg = fsub float -0.000000e+00, %x
  %z = fmul float 4.0, %x.fneg
  store float %z, float addrspace(1)* %out
  ret void
}

; FIXME: Should use SGPR for literal.
; FUNC-LABEL: @commute_add_lit_fabs_f32
; SI: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI: v_mov_b32_e32 [[K:v[0-9]+]], 0x44800000
; SI: v_add_f32_e64 [[REG:v[0-9]+]], |[[X]]|, [[K]]
; SI-NEXT: buffer_store_dword [[REG]]
define void @commute_add_lit_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
  %tid = call i32 @llvm.r600.read.tidig.x() #1
  %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
  %x = load float addrspace(1)* %gep.0
  %x.fabs = call float @llvm.fabs.f32(float %x) #1
  %z = fadd float 1024.0, %x.fabs
  store float %z, float addrspace(1)* %out
  ret void
}

; FUNC-LABEL: @commute_add_fabs_f32
; SI-DAG: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
; SI: v_add_f32_e64 [[REG:v[0-9]+]], [[X]], |[[Y]]|
; SI-NEXT: buffer_store_dword [[REG]]
define void @commute_add_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
  %tid = call i32 @llvm.r600.read.tidig.x() #1
  %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
  %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
  %x = load float addrspace(1)* %gep.0
  %y = load float addrspace(1)* %gep.1
  %y.fabs = call float @llvm.fabs.f32(float %y) #1
  %z = fadd float %x, %y.fabs
  store float %z, float addrspace(1)* %out
  ret void
}

; FUNC-LABEL: @commute_mul_fneg_f32
; SI-DAG: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
; SI: v_mul_f32_e64 [[REG:v[0-9]+]], [[X]], -[[Y]]
; SI-NEXT: buffer_store_dword [[REG]]
define void @commute_mul_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
  %tid = call i32 @llvm.r600.read.tidig.x() #1
  %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
  %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
  %x = load float addrspace(1)* %gep.0
  %y = load float addrspace(1)* %gep.1
  %y.fneg = fsub float -0.000000e+00, %y
  %z = fmul float %x, %y.fneg
  store float %z, float addrspace(1)* %out
  ret void
}

; FUNC-LABEL: @commute_mul_fabs_fneg_f32
; SI-DAG: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
; SI: v_mul_f32_e64 [[REG:v[0-9]+]], [[X]], -|[[Y]]|
; SI-NEXT: buffer_store_dword [[REG]]
define void @commute_mul_fabs_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
  %tid = call i32 @llvm.r600.read.tidig.x() #1
  %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
  %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
  %x = load float addrspace(1)* %gep.0
  %y = load float addrspace(1)* %gep.1
  %y.fabs = call float @llvm.fabs.f32(float %y) #1
  %y.fabs.fneg = fsub float -0.000000e+00, %y.fabs
  %z = fmul float %x, %y.fabs.fneg
  store float %z, float addrspace(1)* %out
  ret void
}

; There's no reason to commute this.
; FUNC-LABEL: @commute_mul_fabs_x_fabs_y_f32
; SI-DAG: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
; SI: v_mul_f32_e64 [[REG:v[0-9]+]], |[[X]]|, |[[Y]]|
; SI-NEXT: buffer_store_dword [[REG]]
define void @commute_mul_fabs_x_fabs_y_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
  %tid = call i32 @llvm.r600.read.tidig.x() #1
  %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
  %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
  %x = load float addrspace(1)* %gep.0
  %y = load float addrspace(1)* %gep.1
  %x.fabs = call float @llvm.fabs.f32(float %x) #1
  %y.fabs = call float @llvm.fabs.f32(float %y) #1
  %z = fmul float %x.fabs, %y.fabs
  store float %z, float addrspace(1)* %out
  ret void
}

; FUNC-LABEL: @commute_mul_fabs_x_fneg_fabs_y_f32
; SI-DAG: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
; SI: v_mul_f32_e64 [[REG:v[0-9]+]], |[[X]]|, -|[[Y]]|
; SI-NEXT: buffer_store_dword [[REG]]
define void @commute_mul_fabs_x_fneg_fabs_y_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
  %tid = call i32 @llvm.r600.read.tidig.x() #1
  %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
  %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
  %x = load float addrspace(1)* %gep.0
  %y = load float addrspace(1)* %gep.1
  %x.fabs = call float @llvm.fabs.f32(float %x) #1
  %y.fabs = call float @llvm.fabs.f32(float %y) #1
  %y.fabs.fneg = fsub float -0.000000e+00, %y.fabs
  %z = fmul float %x.fabs, %y.fabs.fneg
  store float %z, float addrspace(1)* %out
  ret void
}

; Make sure we commute the multiply part for the constant in src0 even
; though we have negate modifier on src2.

; SI-LABEL: {{^}}fma_a_2.0_neg_b_f32
; SI-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
; SI: v_fma_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], |[[R2]]|
; SI: buffer_store_dword [[RESULT]]
define void @fma_a_2.0_neg_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
  %gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
  %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
  %gep.out = getelementptr float addrspace(1)* %out, i32 %tid

  %r1 = load float addrspace(1)* %gep.0
  %r2 = load float addrspace(1)* %gep.1

  %r2.fabs = call float @llvm.fabs.f32(float %r2)

  %r3 = tail call float @llvm.fma.f32(float %r1, float 2.0, float %r2.fabs)
  store float %r3, float addrspace(1)* %gep.out
  ret void
}

attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
R600/SI: Add a stub GCNTargetMachine This is equivalent to the AMDGPUTargetMachine now, but it is the starting point for separating R600 and GCN functionality into separate targets. It is recommened that users start using the gcn triple for GCN-based GPUs, because using the r600 triple for these GPUs will be deprecated in the future. llvm-svn: 225277 2015-01-07 02:00:21 +08:00			`; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s \| FileCheck -check-prefix=SI -check-prefix=FUNC %s`
R600/SI: Allow commuting with source modifiers llvm-svn: 220066 2014-10-18 02:00:48 +08:00
			`declare i32 @llvm.r600.read.tidig.x() #1`
			`declare float @llvm.fabs.f32(float) #1`
R600/SI: Allow commuting with src2_modifiers llvm-svn: 221911 2014-11-14 03:26:50 +08:00			`declare float @llvm.fma.f32(float, float, float) nounwind readnone`
R600/SI: Allow commuting with source modifiers llvm-svn: 220066 2014-10-18 02:00:48 +08:00
			`; FUNC-LABEL: @commute_add_imm_fabs_f32`
R600/SI: Change all instruction assembly names to lowercase. This matches the format produced by the AMD proprietary driver. //==================================================================// // Shell script for converting .ll test cases: (Pass the .ll files you want to convert to this script as arguments). //==================================================================// ; This was necessary on my system so that A-Z in sed would match only ; upper case. I'm not sure why. export LC_ALL='C' TEST_FILES="$" MATCHES=`grep -v Patterns SIInstructions.td \| grep -o '"[A-Z0-9_]\+["e]' \| grep -o '[A-Z0-9_]\+' \| sort -r` for f in $TEST_FILES; do # Check that there are SI tests: grep -q -e 'verde' -e 'bonaire' -e 'SI' -e 'tahiti' $f if [ $? -eq 0 ]; then for match in $MATCHES; do sed -i -e "s/\([ :]$match\)/\L\1/" $f done # Try to get check lines with partial instruction names sed -i 's/\(;[ ]SI[A-Z\\-]: \)\([A-Z_0-9]\+\)/\1\L\2/' $f fi done sed -i -e 's/bb0_1/BB0_1/g' ../../../test/CodeGen/R600/infinite-loop.ll sed -i -e 's/SI-NOT: bfe/SI-NOT: {{[^@]}}bfe/g'../../../test/CodeGen/R600/llvm.AMDGPU.bfe.32.ll ../../../test/CodeGen/R600/sext-in-reg.ll sed -i -e 's/exp_IEEE/EXP_IEEE/g' ../../../test/CodeGen/R600/llvm.exp2.ll sed -i -e 's/numVgprs/NumVgprs/g' ../../../test/CodeGen/R600/register-count-comments.ll sed -i 's/\(; CHECK[-NOT]*: \)\([A-Z_0-9]\+\)/\1\L\2/' ../../../test/CodeGen/R600/select64.ll ../../../test/CodeGen/R600/sgpr-copy.ll //==================================================================// // Shell script for converting .td files (run this last) //==================================================================// export LC_ALL='C' sed -i -e '/Patterns/!s/\("[A-Z0-9_]\+[ "e]\)/\L\1/g' SIInstructions.td sed -i -e 's/"EXP/"exp/g' SIInstrInfo.td llvm-svn: 221350 2014-11-05 22:50:53 +08:00			`; SI: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}`
			`; SI: v_add_f32_e64 [[REG:v[0-9]+]], 2.0, \|[[X]]\|`
			`; SI-NEXT: buffer_store_dword [[REG]]`
R600/SI: Allow commuting with source modifiers llvm-svn: 220066 2014-10-18 02:00:48 +08:00			`define void @commute_add_imm_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {`
			`%tid = call i32 @llvm.r600.read.tidig.x() #1`
			`%gep.0 = getelementptr float addrspace(1)* %in, i32 %tid`
			`%x = load float addrspace(1)* %gep.0`
			`%x.fabs = call float @llvm.fabs.f32(float %x) #1`
			`%z = fadd float 2.0, %x.fabs`
			`store float %z, float addrspace(1)* %out`
			`ret void`
			`}`

			`; FUNC-LABEL: @commute_mul_imm_fneg_fabs_f32`
R600/SI: Change all instruction assembly names to lowercase. This matches the format produced by the AMD proprietary driver. //==================================================================// // Shell script for converting .ll test cases: (Pass the .ll files you want to convert to this script as arguments). //==================================================================// ; This was necessary on my system so that A-Z in sed would match only ; upper case. I'm not sure why. export LC_ALL='C' TEST_FILES="$" MATCHES=`grep -v Patterns SIInstructions.td \| grep -o '"[A-Z0-9_]\+["e]' \| grep -o '[A-Z0-9_]\+' \| sort -r` for f in $TEST_FILES; do # Check that there are SI tests: grep -q -e 'verde' -e 'bonaire' -e 'SI' -e 'tahiti' $f if [ $? -eq 0 ]; then for match in $MATCHES; do sed -i -e "s/\([ :]$match\)/\L\1/" $f done # Try to get check lines with partial instruction names sed -i 's/\(;[ ]SI[A-Z\\-]: \)\([A-Z_0-9]\+\)/\1\L\2/' $f fi done sed -i -e 's/bb0_1/BB0_1/g' ../../../test/CodeGen/R600/infinite-loop.ll sed -i -e 's/SI-NOT: bfe/SI-NOT: {{[^@]}}bfe/g'../../../test/CodeGen/R600/llvm.AMDGPU.bfe.32.ll ../../../test/CodeGen/R600/sext-in-reg.ll sed -i -e 's/exp_IEEE/EXP_IEEE/g' ../../../test/CodeGen/R600/llvm.exp2.ll sed -i -e 's/numVgprs/NumVgprs/g' ../../../test/CodeGen/R600/register-count-comments.ll sed -i 's/\(; CHECK[-NOT]*: \)\([A-Z_0-9]\+\)/\1\L\2/' ../../../test/CodeGen/R600/select64.ll ../../../test/CodeGen/R600/sgpr-copy.ll //==================================================================// // Shell script for converting .td files (run this last) //==================================================================// export LC_ALL='C' sed -i -e '/Patterns/!s/\("[A-Z0-9_]\+[ "e]\)/\L\1/g' SIInstructions.td sed -i -e 's/"EXP/"exp/g' SIInstrInfo.td llvm-svn: 221350 2014-11-05 22:50:53 +08:00			`; SI: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}`
			`; SI: v_mul_f32_e64 [[REG:v[0-9]+]], -4.0, \|[[X]]\|`
			`; SI-NEXT: buffer_store_dword [[REG]]`
R600/SI: Allow commuting with source modifiers llvm-svn: 220066 2014-10-18 02:00:48 +08:00			`define void @commute_mul_imm_fneg_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {`
			`%tid = call i32 @llvm.r600.read.tidig.x() #1`
			`%gep.0 = getelementptr float addrspace(1)* %in, i32 %tid`
			`%x = load float addrspace(1)* %gep.0`
			`%x.fabs = call float @llvm.fabs.f32(float %x) #1`
			`%x.fneg.fabs = fsub float -0.000000e+00, %x.fabs`
			`%z = fmul float 4.0, %x.fneg.fabs`
			`store float %z, float addrspace(1)* %out`
			`ret void`
			`}`

			`; FUNC-LABEL: @commute_mul_imm_fneg_f32`
R600/SI: Change all instruction assembly names to lowercase. This matches the format produced by the AMD proprietary driver. //==================================================================// // Shell script for converting .ll test cases: (Pass the .ll files you want to convert to this script as arguments). //==================================================================// ; This was necessary on my system so that A-Z in sed would match only ; upper case. I'm not sure why. export LC_ALL='C' TEST_FILES="$" MATCHES=`grep -v Patterns SIInstructions.td \| grep -o '"[A-Z0-9_]\+["e]' \| grep -o '[A-Z0-9_]\+' \| sort -r` for f in $TEST_FILES; do # Check that there are SI tests: grep -q -e 'verde' -e 'bonaire' -e 'SI' -e 'tahiti' $f if [ $? -eq 0 ]; then for match in $MATCHES; do sed -i -e "s/\([ :]$match\)/\L\1/" $f done # Try to get check lines with partial instruction names sed -i 's/\(;[ ]SI[A-Z\\-]: \)\([A-Z_0-9]\+\)/\1\L\2/' $f fi done sed -i -e 's/bb0_1/BB0_1/g' ../../../test/CodeGen/R600/infinite-loop.ll sed -i -e 's/SI-NOT: bfe/SI-NOT: {{[^@]}}bfe/g'../../../test/CodeGen/R600/llvm.AMDGPU.bfe.32.ll ../../../test/CodeGen/R600/sext-in-reg.ll sed -i -e 's/exp_IEEE/EXP_IEEE/g' ../../../test/CodeGen/R600/llvm.exp2.ll sed -i -e 's/numVgprs/NumVgprs/g' ../../../test/CodeGen/R600/register-count-comments.ll sed -i 's/\(; CHECK[-NOT]*: \)\([A-Z_0-9]\+\)/\1\L\2/' ../../../test/CodeGen/R600/select64.ll ../../../test/CodeGen/R600/sgpr-copy.ll //==================================================================// // Shell script for converting .td files (run this last) //==================================================================// export LC_ALL='C' sed -i -e '/Patterns/!s/\("[A-Z0-9_]\+[ "e]\)/\L\1/g' SIInstructions.td sed -i -e 's/"EXP/"exp/g' SIInstrInfo.td llvm-svn: 221350 2014-11-05 22:50:53 +08:00			`; SI: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}`
			`; SI: v_mul_f32_e32 [[REG:v[0-9]+]], -4.0, [[X]]`
			`; SI-NEXT: buffer_store_dword [[REG]]`
R600/SI: Allow commuting with source modifiers llvm-svn: 220066 2014-10-18 02:00:48 +08:00			`define void @commute_mul_imm_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {`
			`%tid = call i32 @llvm.r600.read.tidig.x() #1`
			`%gep.0 = getelementptr float addrspace(1)* %in, i32 %tid`
			`%x = load float addrspace(1)* %gep.0`
			`%x.fneg = fsub float -0.000000e+00, %x`
			`%z = fmul float 4.0, %x.fneg`
			`store float %z, float addrspace(1)* %out`
			`ret void`
			`}`

R600/SI: Allow commuting some 3 op instructions e.g. v_mad_f32 a, b, c -> v_mad_f32 b, a, c This simplifies matching v_madmk_f32. This looks somewhat surprising, but it appears to be OK to do this. We can commute src0 and src1 in all of these instructions, and that's all that appears to matter. llvm-svn: 221910 2014-11-14 03:26:47 +08:00			`; FIXME: Should use SGPR for literal.`
			`; FUNC-LABEL: @commute_add_lit_fabs_f32`
			`; SI: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}`
			`; SI: v_mov_b32_e32 [[K:v[0-9]+]], 0x44800000`
			`; SI: v_add_f32_e64 [[REG:v[0-9]+]], \|[[X]]\|, [[K]]`
			`; SI-NEXT: buffer_store_dword [[REG]]`
			`define void @commute_add_lit_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {`
			`%tid = call i32 @llvm.r600.read.tidig.x() #1`
			`%gep.0 = getelementptr float addrspace(1)* %in, i32 %tid`
			`%x = load float addrspace(1)* %gep.0`
			`%x.fabs = call float @llvm.fabs.f32(float %x) #1`
			`%z = fadd float 1024.0, %x.fabs`
			`store float %z, float addrspace(1)* %out`
			`ret void`
			`}`

			`; FUNC-LABEL: @commute_add_fabs_f32`
			`; SI-DAG: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}`
R600/SI: Change mubuf offsets to print as decimal This matches SC's behavior. llvm-svn: 223194 2014-12-03 11:12:13 +08:00			`; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4`
R600/SI: Allow commuting some 3 op instructions e.g. v_mad_f32 a, b, c -> v_mad_f32 b, a, c This simplifies matching v_madmk_f32. This looks somewhat surprising, but it appears to be OK to do this. We can commute src0 and src1 in all of these instructions, and that's all that appears to matter. llvm-svn: 221910 2014-11-14 03:26:47 +08:00			`; SI: v_add_f32_e64 [[REG:v[0-9]+]], [[X]], \|[[Y]]\|`
			`; SI-NEXT: buffer_store_dword [[REG]]`
			`define void @commute_add_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {`
			`%tid = call i32 @llvm.r600.read.tidig.x() #1`
			`%gep.0 = getelementptr float addrspace(1)* %in, i32 %tid`
			`%gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1`
			`%x = load float addrspace(1)* %gep.0`
			`%y = load float addrspace(1)* %gep.1`
			`%y.fabs = call float @llvm.fabs.f32(float %y) #1`
			`%z = fadd float %x, %y.fabs`
			`store float %z, float addrspace(1)* %out`
			`ret void`
			`}`

			`; FUNC-LABEL: @commute_mul_fneg_f32`
			`; SI-DAG: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}`
R600/SI: Change mubuf offsets to print as decimal This matches SC's behavior. llvm-svn: 223194 2014-12-03 11:12:13 +08:00			`; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4`
R600/SI: Allow commuting some 3 op instructions e.g. v_mad_f32 a, b, c -> v_mad_f32 b, a, c This simplifies matching v_madmk_f32. This looks somewhat surprising, but it appears to be OK to do this. We can commute src0 and src1 in all of these instructions, and that's all that appears to matter. llvm-svn: 221910 2014-11-14 03:26:47 +08:00			`; SI: v_mul_f32_e64 [[REG:v[0-9]+]], [[X]], -[[Y]]`
			`; SI-NEXT: buffer_store_dword [[REG]]`
			`define void @commute_mul_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {`
			`%tid = call i32 @llvm.r600.read.tidig.x() #1`
			`%gep.0 = getelementptr float addrspace(1)* %in, i32 %tid`
			`%gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1`
			`%x = load float addrspace(1)* %gep.0`
			`%y = load float addrspace(1)* %gep.1`
			`%y.fneg = fsub float -0.000000e+00, %y`
			`%z = fmul float %x, %y.fneg`
			`store float %z, float addrspace(1)* %out`
			`ret void`
			`}`

			`; FUNC-LABEL: @commute_mul_fabs_fneg_f32`
			`; SI-DAG: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}`
R600/SI: Change mubuf offsets to print as decimal This matches SC's behavior. llvm-svn: 223194 2014-12-03 11:12:13 +08:00			`; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4`
R600/SI: Allow commuting some 3 op instructions e.g. v_mad_f32 a, b, c -> v_mad_f32 b, a, c This simplifies matching v_madmk_f32. This looks somewhat surprising, but it appears to be OK to do this. We can commute src0 and src1 in all of these instructions, and that's all that appears to matter. llvm-svn: 221910 2014-11-14 03:26:47 +08:00			`; SI: v_mul_f32_e64 [[REG:v[0-9]+]], [[X]], -\|[[Y]]\|`
			`; SI-NEXT: buffer_store_dword [[REG]]`
			`define void @commute_mul_fabs_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {`
			`%tid = call i32 @llvm.r600.read.tidig.x() #1`
			`%gep.0 = getelementptr float addrspace(1)* %in, i32 %tid`
			`%gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1`
			`%x = load float addrspace(1)* %gep.0`
			`%y = load float addrspace(1)* %gep.1`
			`%y.fabs = call float @llvm.fabs.f32(float %y) #1`
			`%y.fabs.fneg = fsub float -0.000000e+00, %y.fabs`
			`%z = fmul float %x, %y.fabs.fneg`
			`store float %z, float addrspace(1)* %out`
			`ret void`
			`}`

			`; There's no reason to commute this.`
			`; FUNC-LABEL: @commute_mul_fabs_x_fabs_y_f32`
			`; SI-DAG: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}`
R600/SI: Change mubuf offsets to print as decimal This matches SC's behavior. llvm-svn: 223194 2014-12-03 11:12:13 +08:00			`; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4`
R600/SI: Allow commuting some 3 op instructions e.g. v_mad_f32 a, b, c -> v_mad_f32 b, a, c This simplifies matching v_madmk_f32. This looks somewhat surprising, but it appears to be OK to do this. We can commute src0 and src1 in all of these instructions, and that's all that appears to matter. llvm-svn: 221910 2014-11-14 03:26:47 +08:00			`; SI: v_mul_f32_e64 [[REG:v[0-9]+]], \|[[X]]\|, \|[[Y]]\|`
			`; SI-NEXT: buffer_store_dword [[REG]]`
			`define void @commute_mul_fabs_x_fabs_y_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {`
			`%tid = call i32 @llvm.r600.read.tidig.x() #1`
			`%gep.0 = getelementptr float addrspace(1)* %in, i32 %tid`
			`%gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1`
			`%x = load float addrspace(1)* %gep.0`
			`%y = load float addrspace(1)* %gep.1`
			`%x.fabs = call float @llvm.fabs.f32(float %x) #1`
			`%y.fabs = call float @llvm.fabs.f32(float %y) #1`
			`%z = fmul float %x.fabs, %y.fabs`
			`store float %z, float addrspace(1)* %out`
			`ret void`
			`}`

			`; FUNC-LABEL: @commute_mul_fabs_x_fneg_fabs_y_f32`
			`; SI-DAG: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}`
R600/SI: Change mubuf offsets to print as decimal This matches SC's behavior. llvm-svn: 223194 2014-12-03 11:12:13 +08:00			`; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4`
R600/SI: Allow commuting some 3 op instructions e.g. v_mad_f32 a, b, c -> v_mad_f32 b, a, c This simplifies matching v_madmk_f32. This looks somewhat surprising, but it appears to be OK to do this. We can commute src0 and src1 in all of these instructions, and that's all that appears to matter. llvm-svn: 221910 2014-11-14 03:26:47 +08:00			`; SI: v_mul_f32_e64 [[REG:v[0-9]+]], \|[[X]]\|, -\|[[Y]]\|`
			`; SI-NEXT: buffer_store_dword [[REG]]`
			`define void @commute_mul_fabs_x_fneg_fabs_y_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {`
			`%tid = call i32 @llvm.r600.read.tidig.x() #1`
			`%gep.0 = getelementptr float addrspace(1)* %in, i32 %tid`
			`%gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1`
			`%x = load float addrspace(1)* %gep.0`
			`%y = load float addrspace(1)* %gep.1`
			`%x.fabs = call float @llvm.fabs.f32(float %x) #1`
			`%y.fabs = call float @llvm.fabs.f32(float %y) #1`
			`%y.fabs.fneg = fsub float -0.000000e+00, %y.fabs`
			`%z = fmul float %x.fabs, %y.fabs.fneg`
			`store float %z, float addrspace(1)* %out`
			`ret void`
			`}`

R600/SI: Allow commuting with src2_modifiers llvm-svn: 221911 2014-11-14 03:26:50 +08:00			`; Make sure we commute the multiply part for the constant in src0 even`
			`; though we have negate modifier on src2.`

			`; SI-LABEL: {{^}}fma_a_2.0_neg_b_f32`
			`; SI-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}`
R600/SI: Change mubuf offsets to print as decimal This matches SC's behavior. llvm-svn: 223194 2014-12-03 11:12:13 +08:00			`; SI-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4`
R600/SI: Allow commuting with src2_modifiers llvm-svn: 221911 2014-11-14 03:26:50 +08:00			`; SI: v_fma_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], \|[[R2]]\|`
			`; SI: buffer_store_dword [[RESULT]]`
			`define void @fma_a_2.0_neg_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {`
			`%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone`
			`%gep.0 = getelementptr float addrspace(1)* %out, i32 %tid`
			`%gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1`
			`%gep.out = getelementptr float addrspace(1)* %out, i32 %tid`

			`%r1 = load float addrspace(1)* %gep.0`
			`%r2 = load float addrspace(1)* %gep.1`

			`%r2.fabs = call float @llvm.fabs.f32(float %r2)`

			`%r3 = tail call float @llvm.fma.f32(float %r1, float 2.0, float %r2.fabs)`
			`store float %r3, float addrspace(1)* %gep.out`
			`ret void`
			`}`

R600/SI: Allow commuting with source modifiers llvm-svn: 220066 2014-10-18 02:00:48 +08:00			`attributes #0 = { nounwind }`
			`attributes #1 = { nounwind readnone }`