forked from OSchip/llvm-project
[RISCV] Fix the neutral element in vector 'fadd' reductions
Using positive zero as the neutral element in 'fadd' reductions, while it generates better code, is incorrect. The correct neutral element is negative zero: 0.0 + -0.0 = 0.0, whereas -0.0 + -0.0 = -0.0. There are perhaps more optimal lowerings of negative zero avoiding constant-pool loads which could be left as future work. Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D105902
This commit is contained in:
parent
4359b870b1
commit
03a4702c88
|
@ -3718,7 +3718,7 @@ getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT) {
|
|||
llvm_unreachable("Unhandled reduction");
|
||||
case ISD::VECREDUCE_FADD:
|
||||
return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0),
|
||||
DAG.getConstantFP(0.0, DL, EltVT));
|
||||
DAG.getNeutralElement(BaseOpcode, DL, EltVT, Flags));
|
||||
case ISD::VECREDUCE_SEQ_FADD:
|
||||
return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1),
|
||||
Op.getOperand(0));
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+experimental-zfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s
|
||||
; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+experimental-zfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s
|
||||
; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+experimental-zfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
|
||||
; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+experimental-zfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
|
||||
|
||||
declare half @llvm.vector.reduce.fadd.v1f16(half, <1 x half>)
|
||||
|
||||
|
@ -38,10 +38,12 @@ declare half @llvm.vector.reduce.fadd.v2f16(half, <2 x half>)
|
|||
define half @vreduce_fadd_v2f16(<2 x half>* %x, half %s) {
|
||||
; CHECK-LABEL: vreduce_fadd_v2f16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: lui a1, %hi(.LCPI2_0)
|
||||
; CHECK-NEXT: flh ft0, %lo(.LCPI2_0)(a1)
|
||||
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
|
||||
; CHECK-NEXT: vle16.v v25, (a0)
|
||||
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu
|
||||
; CHECK-NEXT: vmv.v.i v26, 0
|
||||
; CHECK-NEXT: vfmv.v.f v26, ft0
|
||||
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
|
||||
; CHECK-NEXT: vfredsum.vs v25, v25, v26
|
||||
; CHECK-NEXT: vfmv.f.s ft0, v25
|
||||
|
@ -73,10 +75,12 @@ declare half @llvm.vector.reduce.fadd.v4f16(half, <4 x half>)
|
|||
define half @vreduce_fadd_v4f16(<4 x half>* %x, half %s) {
|
||||
; CHECK-LABEL: vreduce_fadd_v4f16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: lui a1, %hi(.LCPI4_0)
|
||||
; CHECK-NEXT: flh ft0, %lo(.LCPI4_0)(a1)
|
||||
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
|
||||
; CHECK-NEXT: vle16.v v25, (a0)
|
||||
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu
|
||||
; CHECK-NEXT: vmv.v.i v26, 0
|
||||
; CHECK-NEXT: vfmv.v.f v26, ft0
|
||||
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
|
||||
; CHECK-NEXT: vfredsum.vs v25, v25, v26
|
||||
; CHECK-NEXT: vfmv.f.s ft0, v25
|
||||
|
@ -108,10 +112,12 @@ declare half @llvm.vector.reduce.fadd.v8f16(half, <8 x half>)
|
|||
define half @vreduce_fadd_v8f16(<8 x half>* %x, half %s) {
|
||||
; CHECK-LABEL: vreduce_fadd_v8f16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: lui a1, %hi(.LCPI6_0)
|
||||
; CHECK-NEXT: flh ft0, %lo(.LCPI6_0)(a1)
|
||||
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu
|
||||
; CHECK-NEXT: vle16.v v25, (a0)
|
||||
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu
|
||||
; CHECK-NEXT: vmv.v.i v26, 0
|
||||
; CHECK-NEXT: vfmv.v.f v26, ft0
|
||||
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu
|
||||
; CHECK-NEXT: vfredsum.vs v25, v25, v26
|
||||
; CHECK-NEXT: vfmv.f.s ft0, v25
|
||||
|
@ -143,10 +149,12 @@ declare half @llvm.vector.reduce.fadd.v16f16(half, <16 x half>)
|
|||
define half @vreduce_fadd_v16f16(<16 x half>* %x, half %s) {
|
||||
; CHECK-LABEL: vreduce_fadd_v16f16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: lui a1, %hi(.LCPI8_0)
|
||||
; CHECK-NEXT: flh ft0, %lo(.LCPI8_0)(a1)
|
||||
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu
|
||||
; CHECK-NEXT: vle16.v v26, (a0)
|
||||
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu
|
||||
; CHECK-NEXT: vmv.v.i v25, 0
|
||||
; CHECK-NEXT: vfmv.v.f v25, ft0
|
||||
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu
|
||||
; CHECK-NEXT: vfredsum.vs v25, v26, v25
|
||||
; CHECK-NEXT: vfmv.f.s ft0, v25
|
||||
|
@ -176,18 +184,35 @@ define half @vreduce_ord_fadd_v16f16(<16 x half>* %x, half %s) {
|
|||
declare half @llvm.vector.reduce.fadd.v32f16(half, <32 x half>)
|
||||
|
||||
define half @vreduce_fadd_v32f16(<32 x half>* %x, half %s) {
|
||||
; CHECK-LABEL: vreduce_fadd_v32f16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: addi a1, zero, 32
|
||||
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu
|
||||
; CHECK-NEXT: vle16.v v28, (a0)
|
||||
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu
|
||||
; CHECK-NEXT: vmv.v.i v25, 0
|
||||
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu
|
||||
; CHECK-NEXT: vfredsum.vs v25, v28, v25
|
||||
; CHECK-NEXT: vfmv.f.s ft0, v25
|
||||
; CHECK-NEXT: fadd.h fa0, fa0, ft0
|
||||
; CHECK-NEXT: ret
|
||||
; RV32-LABEL: vreduce_fadd_v32f16:
|
||||
; RV32: # %bb.0:
|
||||
; RV32-NEXT: addi a1, zero, 32
|
||||
; RV32-NEXT: lui a2, %hi(.LCPI10_0)
|
||||
; RV32-NEXT: flh ft0, %lo(.LCPI10_0)(a2)
|
||||
; RV32-NEXT: vsetvli zero, a1, e16, m4, ta, mu
|
||||
; RV32-NEXT: vle16.v v28, (a0)
|
||||
; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu
|
||||
; RV32-NEXT: vfmv.v.f v25, ft0
|
||||
; RV32-NEXT: vsetvli zero, a1, e16, m4, ta, mu
|
||||
; RV32-NEXT: vfredsum.vs v25, v28, v25
|
||||
; RV32-NEXT: vfmv.f.s ft0, v25
|
||||
; RV32-NEXT: fadd.h fa0, fa0, ft0
|
||||
; RV32-NEXT: ret
|
||||
;
|
||||
; RV64-LABEL: vreduce_fadd_v32f16:
|
||||
; RV64: # %bb.0:
|
||||
; RV64-NEXT: lui a1, %hi(.LCPI10_0)
|
||||
; RV64-NEXT: flh ft0, %lo(.LCPI10_0)(a1)
|
||||
; RV64-NEXT: addi a1, zero, 32
|
||||
; RV64-NEXT: vsetvli zero, a1, e16, m4, ta, mu
|
||||
; RV64-NEXT: vle16.v v28, (a0)
|
||||
; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, mu
|
||||
; RV64-NEXT: vfmv.v.f v25, ft0
|
||||
; RV64-NEXT: vsetvli zero, a1, e16, m4, ta, mu
|
||||
; RV64-NEXT: vfredsum.vs v25, v28, v25
|
||||
; RV64-NEXT: vfmv.f.s ft0, v25
|
||||
; RV64-NEXT: fadd.h fa0, fa0, ft0
|
||||
; RV64-NEXT: ret
|
||||
%v = load <32 x half>, <32 x half>* %x
|
||||
%red = call reassoc half @llvm.vector.reduce.fadd.v32f16(half %s, <32 x half> %v)
|
||||
ret half %red
|
||||
|
@ -213,18 +238,35 @@ define half @vreduce_ord_fadd_v32f16(<32 x half>* %x, half %s) {
|
|||
declare half @llvm.vector.reduce.fadd.v64f16(half, <64 x half>)
|
||||
|
||||
define half @vreduce_fadd_v64f16(<64 x half>* %x, half %s) {
|
||||
; CHECK-LABEL: vreduce_fadd_v64f16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: addi a1, zero, 64
|
||||
; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu
|
||||
; CHECK-NEXT: vle16.v v8, (a0)
|
||||
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu
|
||||
; CHECK-NEXT: vmv.v.i v25, 0
|
||||
; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu
|
||||
; CHECK-NEXT: vfredsum.vs v25, v8, v25
|
||||
; CHECK-NEXT: vfmv.f.s ft0, v25
|
||||
; CHECK-NEXT: fadd.h fa0, fa0, ft0
|
||||
; CHECK-NEXT: ret
|
||||
; RV32-LABEL: vreduce_fadd_v64f16:
|
||||
; RV32: # %bb.0:
|
||||
; RV32-NEXT: addi a1, zero, 64
|
||||
; RV32-NEXT: lui a2, %hi(.LCPI12_0)
|
||||
; RV32-NEXT: flh ft0, %lo(.LCPI12_0)(a2)
|
||||
; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, mu
|
||||
; RV32-NEXT: vle16.v v8, (a0)
|
||||
; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu
|
||||
; RV32-NEXT: vfmv.v.f v25, ft0
|
||||
; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, mu
|
||||
; RV32-NEXT: vfredsum.vs v25, v8, v25
|
||||
; RV32-NEXT: vfmv.f.s ft0, v25
|
||||
; RV32-NEXT: fadd.h fa0, fa0, ft0
|
||||
; RV32-NEXT: ret
|
||||
;
|
||||
; RV64-LABEL: vreduce_fadd_v64f16:
|
||||
; RV64: # %bb.0:
|
||||
; RV64-NEXT: lui a1, %hi(.LCPI12_0)
|
||||
; RV64-NEXT: flh ft0, %lo(.LCPI12_0)(a1)
|
||||
; RV64-NEXT: addi a1, zero, 64
|
||||
; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, mu
|
||||
; RV64-NEXT: vle16.v v8, (a0)
|
||||
; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, mu
|
||||
; RV64-NEXT: vfmv.v.f v25, ft0
|
||||
; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, mu
|
||||
; RV64-NEXT: vfredsum.vs v25, v8, v25
|
||||
; RV64-NEXT: vfmv.f.s ft0, v25
|
||||
; RV64-NEXT: fadd.h fa0, fa0, ft0
|
||||
; RV64-NEXT: ret
|
||||
%v = load <64 x half>, <64 x half>* %x
|
||||
%red = call reassoc half @llvm.vector.reduce.fadd.v64f16(half %s, <64 x half> %v)
|
||||
ret half %red
|
||||
|
@ -257,9 +299,11 @@ define half @vreduce_fadd_v128f16(<128 x half>* %x, half %s) {
|
|||
; CHECK-NEXT: vle16.v v8, (a0)
|
||||
; CHECK-NEXT: addi a0, a0, 128
|
||||
; CHECK-NEXT: vle16.v v16, (a0)
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI14_0)
|
||||
; CHECK-NEXT: flh ft0, %lo(.LCPI14_0)(a0)
|
||||
; CHECK-NEXT: vfadd.vv v8, v8, v16
|
||||
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu
|
||||
; CHECK-NEXT: vmv.v.i v25, 0
|
||||
; CHECK-NEXT: vfmv.v.f v25, ft0
|
||||
; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu
|
||||
; CHECK-NEXT: vfredsum.vs v25, v8, v25
|
||||
; CHECK-NEXT: vfmv.f.s ft0, v25
|
||||
|
@ -330,10 +374,12 @@ declare float @llvm.vector.reduce.fadd.v2f32(float, <2 x float>)
|
|||
define float @vreduce_fadd_v2f32(<2 x float>* %x, float %s) {
|
||||
; CHECK-LABEL: vreduce_fadd_v2f32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: lui a1, %hi(.LCPI18_0)
|
||||
; CHECK-NEXT: flw ft0, %lo(.LCPI18_0)(a1)
|
||||
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
|
||||
; CHECK-NEXT: vle32.v v25, (a0)
|
||||
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu
|
||||
; CHECK-NEXT: vmv.v.i v26, 0
|
||||
; CHECK-NEXT: vfmv.v.f v26, ft0
|
||||
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
|
||||
; CHECK-NEXT: vfredsum.vs v25, v25, v26
|
||||
; CHECK-NEXT: vfmv.f.s ft0, v25
|
||||
|
@ -365,10 +411,12 @@ declare float @llvm.vector.reduce.fadd.v4f32(float, <4 x float>)
|
|||
define float @vreduce_fadd_v4f32(<4 x float>* %x, float %s) {
|
||||
; CHECK-LABEL: vreduce_fadd_v4f32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: lui a1, %hi(.LCPI20_0)
|
||||
; CHECK-NEXT: flw ft0, %lo(.LCPI20_0)(a1)
|
||||
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
|
||||
; CHECK-NEXT: vle32.v v25, (a0)
|
||||
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu
|
||||
; CHECK-NEXT: vmv.v.i v26, 0
|
||||
; CHECK-NEXT: vfmv.v.f v26, ft0
|
||||
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
|
||||
; CHECK-NEXT: vfredsum.vs v25, v25, v26
|
||||
; CHECK-NEXT: vfmv.f.s ft0, v25
|
||||
|
@ -400,10 +448,12 @@ declare float @llvm.vector.reduce.fadd.v8f32(float, <8 x float>)
|
|||
define float @vreduce_fadd_v8f32(<8 x float>* %x, float %s) {
|
||||
; CHECK-LABEL: vreduce_fadd_v8f32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: lui a1, %hi(.LCPI22_0)
|
||||
; CHECK-NEXT: flw ft0, %lo(.LCPI22_0)(a1)
|
||||
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu
|
||||
; CHECK-NEXT: vle32.v v26, (a0)
|
||||
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu
|
||||
; CHECK-NEXT: vmv.v.i v25, 0
|
||||
; CHECK-NEXT: vfmv.v.f v25, ft0
|
||||
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu
|
||||
; CHECK-NEXT: vfredsum.vs v25, v26, v25
|
||||
; CHECK-NEXT: vfmv.f.s ft0, v25
|
||||
|
@ -435,10 +485,12 @@ declare float @llvm.vector.reduce.fadd.v16f32(float, <16 x float>)
|
|||
define float @vreduce_fadd_v16f32(<16 x float>* %x, float %s) {
|
||||
; CHECK-LABEL: vreduce_fadd_v16f32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: lui a1, %hi(.LCPI24_0)
|
||||
; CHECK-NEXT: flw ft0, %lo(.LCPI24_0)(a1)
|
||||
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu
|
||||
; CHECK-NEXT: vle32.v v28, (a0)
|
||||
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu
|
||||
; CHECK-NEXT: vmv.v.i v25, 0
|
||||
; CHECK-NEXT: vfmv.v.f v25, ft0
|
||||
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu
|
||||
; CHECK-NEXT: vfredsum.vs v25, v28, v25
|
||||
; CHECK-NEXT: vfmv.f.s ft0, v25
|
||||
|
@ -468,18 +520,35 @@ define float @vreduce_ord_fadd_v16f32(<16 x float>* %x, float %s) {
|
|||
declare float @llvm.vector.reduce.fadd.v32f32(float, <32 x float>)
|
||||
|
||||
define float @vreduce_fadd_v32f32(<32 x float>* %x, float %s) {
|
||||
; CHECK-LABEL: vreduce_fadd_v32f32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: addi a1, zero, 32
|
||||
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu
|
||||
; CHECK-NEXT: vle32.v v8, (a0)
|
||||
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu
|
||||
; CHECK-NEXT: vmv.v.i v25, 0
|
||||
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu
|
||||
; CHECK-NEXT: vfredsum.vs v25, v8, v25
|
||||
; CHECK-NEXT: vfmv.f.s ft0, v25
|
||||
; CHECK-NEXT: fadd.s fa0, fa0, ft0
|
||||
; CHECK-NEXT: ret
|
||||
; RV32-LABEL: vreduce_fadd_v32f32:
|
||||
; RV32: # %bb.0:
|
||||
; RV32-NEXT: addi a1, zero, 32
|
||||
; RV32-NEXT: lui a2, %hi(.LCPI26_0)
|
||||
; RV32-NEXT: flw ft0, %lo(.LCPI26_0)(a2)
|
||||
; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, mu
|
||||
; RV32-NEXT: vle32.v v8, (a0)
|
||||
; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, mu
|
||||
; RV32-NEXT: vfmv.v.f v25, ft0
|
||||
; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, mu
|
||||
; RV32-NEXT: vfredsum.vs v25, v8, v25
|
||||
; RV32-NEXT: vfmv.f.s ft0, v25
|
||||
; RV32-NEXT: fadd.s fa0, fa0, ft0
|
||||
; RV32-NEXT: ret
|
||||
;
|
||||
; RV64-LABEL: vreduce_fadd_v32f32:
|
||||
; RV64: # %bb.0:
|
||||
; RV64-NEXT: lui a1, %hi(.LCPI26_0)
|
||||
; RV64-NEXT: flw ft0, %lo(.LCPI26_0)(a1)
|
||||
; RV64-NEXT: addi a1, zero, 32
|
||||
; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, mu
|
||||
; RV64-NEXT: vle32.v v8, (a0)
|
||||
; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, mu
|
||||
; RV64-NEXT: vfmv.v.f v25, ft0
|
||||
; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, mu
|
||||
; RV64-NEXT: vfredsum.vs v25, v8, v25
|
||||
; RV64-NEXT: vfmv.f.s ft0, v25
|
||||
; RV64-NEXT: fadd.s fa0, fa0, ft0
|
||||
; RV64-NEXT: ret
|
||||
%v = load <32 x float>, <32 x float>* %x
|
||||
%red = call reassoc float @llvm.vector.reduce.fadd.v32f32(float %s, <32 x float> %v)
|
||||
ret float %red
|
||||
|
@ -512,9 +581,11 @@ define float @vreduce_fadd_v64f32(<64 x float>* %x, float %s) {
|
|||
; CHECK-NEXT: vle32.v v8, (a0)
|
||||
; CHECK-NEXT: addi a0, a0, 128
|
||||
; CHECK-NEXT: vle32.v v16, (a0)
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI28_0)
|
||||
; CHECK-NEXT: flw ft0, %lo(.LCPI28_0)(a0)
|
||||
; CHECK-NEXT: vfadd.vv v8, v8, v16
|
||||
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu
|
||||
; CHECK-NEXT: vmv.v.i v25, 0
|
||||
; CHECK-NEXT: vfmv.v.f v25, ft0
|
||||
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu
|
||||
; CHECK-NEXT: vfredsum.vs v25, v8, v25
|
||||
; CHECK-NEXT: vfmv.f.s ft0, v25
|
||||
|
@ -585,10 +656,12 @@ declare double @llvm.vector.reduce.fadd.v2f64(double, <2 x double>)
|
|||
define double @vreduce_fadd_v2f64(<2 x double>* %x, double %s) {
|
||||
; CHECK-LABEL: vreduce_fadd_v2f64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: lui a1, %hi(.LCPI32_0)
|
||||
; CHECK-NEXT: fld ft0, %lo(.LCPI32_0)(a1)
|
||||
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu
|
||||
; CHECK-NEXT: vle64.v v25, (a0)
|
||||
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu
|
||||
; CHECK-NEXT: vmv.v.i v26, 0
|
||||
; CHECK-NEXT: vfmv.v.f v26, ft0
|
||||
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu
|
||||
; CHECK-NEXT: vfredsum.vs v25, v25, v26
|
||||
; CHECK-NEXT: vfmv.f.s ft0, v25
|
||||
|
@ -620,10 +693,12 @@ declare double @llvm.vector.reduce.fadd.v4f64(double, <4 x double>)
|
|||
define double @vreduce_fadd_v4f64(<4 x double>* %x, double %s) {
|
||||
; CHECK-LABEL: vreduce_fadd_v4f64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: lui a1, %hi(.LCPI34_0)
|
||||
; CHECK-NEXT: fld ft0, %lo(.LCPI34_0)(a1)
|
||||
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu
|
||||
; CHECK-NEXT: vle64.v v26, (a0)
|
||||
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu
|
||||
; CHECK-NEXT: vmv.v.i v25, 0
|
||||
; CHECK-NEXT: vfmv.v.f v25, ft0
|
||||
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu
|
||||
; CHECK-NEXT: vfredsum.vs v25, v26, v25
|
||||
; CHECK-NEXT: vfmv.f.s ft0, v25
|
||||
|
@ -655,10 +730,12 @@ declare double @llvm.vector.reduce.fadd.v8f64(double, <8 x double>)
|
|||
define double @vreduce_fadd_v8f64(<8 x double>* %x, double %s) {
|
||||
; CHECK-LABEL: vreduce_fadd_v8f64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: lui a1, %hi(.LCPI36_0)
|
||||
; CHECK-NEXT: fld ft0, %lo(.LCPI36_0)(a1)
|
||||
; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu
|
||||
; CHECK-NEXT: vle64.v v28, (a0)
|
||||
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu
|
||||
; CHECK-NEXT: vmv.v.i v25, 0
|
||||
; CHECK-NEXT: vfmv.v.f v25, ft0
|
||||
; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu
|
||||
; CHECK-NEXT: vfredsum.vs v25, v28, v25
|
||||
; CHECK-NEXT: vfmv.f.s ft0, v25
|
||||
|
@ -690,10 +767,12 @@ declare double @llvm.vector.reduce.fadd.v16f64(double, <16 x double>)
|
|||
define double @vreduce_fadd_v16f64(<16 x double>* %x, double %s) {
|
||||
; CHECK-LABEL: vreduce_fadd_v16f64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: lui a1, %hi(.LCPI38_0)
|
||||
; CHECK-NEXT: fld ft0, %lo(.LCPI38_0)(a1)
|
||||
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, mu
|
||||
; CHECK-NEXT: vle64.v v8, (a0)
|
||||
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu
|
||||
; CHECK-NEXT: vmv.v.i v25, 0
|
||||
; CHECK-NEXT: vfmv.v.f v25, ft0
|
||||
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, mu
|
||||
; CHECK-NEXT: vfredsum.vs v25, v8, v25
|
||||
; CHECK-NEXT: vfmv.f.s ft0, v25
|
||||
|
@ -729,9 +808,11 @@ define double @vreduce_fadd_v32f64(<32 x double>* %x, double %s) {
|
|||
; CHECK-NEXT: vle64.v v8, (a0)
|
||||
; CHECK-NEXT: addi a0, a0, 128
|
||||
; CHECK-NEXT: vle64.v v16, (a0)
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI40_0)
|
||||
; CHECK-NEXT: fld ft0, %lo(.LCPI40_0)(a0)
|
||||
; CHECK-NEXT: vfadd.vv v8, v8, v16
|
||||
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu
|
||||
; CHECK-NEXT: vmv.v.i v25, 0
|
||||
; CHECK-NEXT: vfmv.v.f v25, ft0
|
||||
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, mu
|
||||
; CHECK-NEXT: vfredsum.vs v25, v8, v25
|
||||
; CHECK-NEXT: vfmv.f.s ft0, v25
|
||||
|
|
|
@ -9,8 +9,10 @@ declare half @llvm.vector.reduce.fadd.nxv1f16(half, <vscale x 1 x half>)
|
|||
define half @vreduce_fadd_nxv1f16(<vscale x 1 x half> %v, half %s) {
|
||||
; CHECK-LABEL: vreduce_fadd_nxv1f16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI0_0)
|
||||
; CHECK-NEXT: flh ft0, %lo(.LCPI0_0)(a0)
|
||||
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu
|
||||
; CHECK-NEXT: vmv.v.i v25, 0
|
||||
; CHECK-NEXT: vfmv.v.f v25, ft0
|
||||
; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu
|
||||
; CHECK-NEXT: vfredsum.vs v25, v8, v25
|
||||
; CHECK-NEXT: vfmv.f.s ft0, v25
|
||||
|
@ -38,8 +40,10 @@ declare half @llvm.vector.reduce.fadd.nxv2f16(half, <vscale x 2 x half>)
|
|||
define half @vreduce_fadd_nxv2f16(<vscale x 2 x half> %v, half %s) {
|
||||
; CHECK-LABEL: vreduce_fadd_nxv2f16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI2_0)
|
||||
; CHECK-NEXT: flh ft0, %lo(.LCPI2_0)(a0)
|
||||
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu
|
||||
; CHECK-NEXT: vmv.v.i v25, 0
|
||||
; CHECK-NEXT: vfmv.v.f v25, ft0
|
||||
; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu
|
||||
; CHECK-NEXT: vfredsum.vs v25, v8, v25
|
||||
; CHECK-NEXT: vfmv.f.s ft0, v25
|
||||
|
@ -67,8 +71,10 @@ declare half @llvm.vector.reduce.fadd.nxv4f16(half, <vscale x 4 x half>)
|
|||
define half @vreduce_fadd_nxv4f16(<vscale x 4 x half> %v, half %s) {
|
||||
; CHECK-LABEL: vreduce_fadd_nxv4f16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI4_0)
|
||||
; CHECK-NEXT: flh ft0, %lo(.LCPI4_0)(a0)
|
||||
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu
|
||||
; CHECK-NEXT: vmv.v.i v25, 0
|
||||
; CHECK-NEXT: vfmv.v.f v25, ft0
|
||||
; CHECK-NEXT: vfredsum.vs v25, v8, v25
|
||||
; CHECK-NEXT: vfmv.f.s ft0, v25
|
||||
; CHECK-NEXT: fadd.h fa0, fa0, ft0
|
||||
|
@ -94,8 +100,10 @@ declare float @llvm.vector.reduce.fadd.nxv1f32(float, <vscale x 1 x float>)
|
|||
define float @vreduce_fadd_nxv1f32(<vscale x 1 x float> %v, float %s) {
|
||||
; CHECK-LABEL: vreduce_fadd_nxv1f32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI6_0)
|
||||
; CHECK-NEXT: flw ft0, %lo(.LCPI6_0)(a0)
|
||||
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu
|
||||
; CHECK-NEXT: vmv.v.i v25, 0
|
||||
; CHECK-NEXT: vfmv.v.f v25, ft0
|
||||
; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu
|
||||
; CHECK-NEXT: vfredsum.vs v25, v8, v25
|
||||
; CHECK-NEXT: vfmv.f.s ft0, v25
|
||||
|
@ -123,8 +131,10 @@ declare float @llvm.vector.reduce.fadd.nxv2f32(float, <vscale x 2 x float>)
|
|||
define float @vreduce_fadd_nxv2f32(<vscale x 2 x float> %v, float %s) {
|
||||
; CHECK-LABEL: vreduce_fadd_nxv2f32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI8_0)
|
||||
; CHECK-NEXT: flw ft0, %lo(.LCPI8_0)(a0)
|
||||
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu
|
||||
; CHECK-NEXT: vmv.v.i v25, 0
|
||||
; CHECK-NEXT: vfmv.v.f v25, ft0
|
||||
; CHECK-NEXT: vfredsum.vs v25, v8, v25
|
||||
; CHECK-NEXT: vfmv.f.s ft0, v25
|
||||
; CHECK-NEXT: fadd.s fa0, fa0, ft0
|
||||
|
@ -150,8 +160,10 @@ declare float @llvm.vector.reduce.fadd.nxv4f32(float, <vscale x 4 x float>)
|
|||
define float @vreduce_fadd_nxv4f32(<vscale x 4 x float> %v, float %s) {
|
||||
; CHECK-LABEL: vreduce_fadd_nxv4f32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI10_0)
|
||||
; CHECK-NEXT: flw ft0, %lo(.LCPI10_0)(a0)
|
||||
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu
|
||||
; CHECK-NEXT: vmv.v.i v25, 0
|
||||
; CHECK-NEXT: vfmv.v.f v25, ft0
|
||||
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu
|
||||
; CHECK-NEXT: vfredsum.vs v25, v8, v25
|
||||
; CHECK-NEXT: vfmv.f.s ft0, v25
|
||||
|
@ -179,8 +191,10 @@ declare double @llvm.vector.reduce.fadd.nxv1f64(double, <vscale x 1 x double>)
|
|||
define double @vreduce_fadd_nxv1f64(<vscale x 1 x double> %v, double %s) {
|
||||
; CHECK-LABEL: vreduce_fadd_nxv1f64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI12_0)
|
||||
; CHECK-NEXT: fld ft0, %lo(.LCPI12_0)(a0)
|
||||
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu
|
||||
; CHECK-NEXT: vmv.v.i v25, 0
|
||||
; CHECK-NEXT: vfmv.v.f v25, ft0
|
||||
; CHECK-NEXT: vfredsum.vs v25, v8, v25
|
||||
; CHECK-NEXT: vfmv.f.s ft0, v25
|
||||
; CHECK-NEXT: fadd.d fa0, fa0, ft0
|
||||
|
@ -206,8 +220,10 @@ declare double @llvm.vector.reduce.fadd.nxv2f64(double, <vscale x 2 x double>)
|
|||
define double @vreduce_fadd_nxv2f64(<vscale x 2 x double> %v, double %s) {
|
||||
; CHECK-LABEL: vreduce_fadd_nxv2f64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI14_0)
|
||||
; CHECK-NEXT: fld ft0, %lo(.LCPI14_0)(a0)
|
||||
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu
|
||||
; CHECK-NEXT: vmv.v.i v25, 0
|
||||
; CHECK-NEXT: vfmv.v.f v25, ft0
|
||||
; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu
|
||||
; CHECK-NEXT: vfredsum.vs v25, v8, v25
|
||||
; CHECK-NEXT: vfmv.f.s ft0, v25
|
||||
|
@ -235,8 +251,10 @@ declare double @llvm.vector.reduce.fadd.nxv4f64(double, <vscale x 4 x double>)
|
|||
define double @vreduce_fadd_nxv4f64(<vscale x 4 x double> %v, double %s) {
|
||||
; CHECK-LABEL: vreduce_fadd_nxv4f64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI16_0)
|
||||
; CHECK-NEXT: fld ft0, %lo(.LCPI16_0)(a0)
|
||||
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu
|
||||
; CHECK-NEXT: vmv.v.i v25, 0
|
||||
; CHECK-NEXT: vfmv.v.f v25, ft0
|
||||
; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu
|
||||
; CHECK-NEXT: vfredsum.vs v25, v8, v25
|
||||
; CHECK-NEXT: vfmv.f.s ft0, v25
|
||||
|
|
Loading…
Reference in New Issue