forked from OSchip/llvm-project
[RISCV] Match vwmulsu_vx with scalar splat input.
This is a more generic version of D119110 that uses MaskedValueIsZero to do the matching and SimplifyDemandedBits to remove any unneeded AND instructions. Tests were taken from D119110. Reviewed By: Chenbing.Zheng Differential Revision: https://reviews.llvm.org/D119622
This commit is contained in:
parent
d132b47bb9
commit
ab6e02dded
|
@ -7786,12 +7786,15 @@ static SDValue combineMUL_VLToVWMUL_VL(SDNode *N, SelectionDAG &DAG,
|
|||
if (ScalarBits < EltBits)
|
||||
return SDValue();
|
||||
|
||||
if (IsSignExt) {
|
||||
if (DAG.ComputeNumSignBits(Op1) <= (ScalarBits - NarrowSize))
|
||||
return SDValue();
|
||||
// If the LHS is a sign extend, try to use vwmul.
|
||||
if (IsSignExt && DAG.ComputeNumSignBits(Op1) > (ScalarBits - NarrowSize)) {
|
||||
// Can use vwmul.
|
||||
} else {
|
||||
// Otherwise try to use vwmulu or vwmulsu.
|
||||
APInt Mask = APInt::getBitsSetFrom(ScalarBits, NarrowSize);
|
||||
if (!DAG.MaskedValueIsZero(Op1, Mask))
|
||||
if (DAG.MaskedValueIsZero(Op1, Mask))
|
||||
IsVWMULSU = IsSignExt;
|
||||
else
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
|
@ -8438,6 +8441,16 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
|
|||
return Gather;
|
||||
break;
|
||||
}
|
||||
case RISCVISD::VMV_V_X_VL: {
|
||||
// VMV.V.X only demands the vector element bitwidth from the scalar input.
|
||||
unsigned ScalarSize = N->getOperand(0).getValueSizeInBits();
|
||||
unsigned EltWidth = N->getValueType(0).getScalarSizeInBits();
|
||||
if (ScalarSize > EltWidth)
|
||||
if (SimplifyDemandedLowBitsHelper(0, EltWidth))
|
||||
return SDValue(N, 0);
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK
|
||||
; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK
|
||||
; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
|
||||
; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
|
||||
|
||||
define <2 x i16> @vwmulsu_v2i16(<2 x i8>* %x, <2 x i8>* %y) {
|
||||
; CHECK-LABEL: vwmulsu_v2i16:
|
||||
|
@ -681,3 +681,247 @@ define <16 x i64> @vwmulsu_vx_v16i64(<16 x i32>* %x, i32 %y) {
|
|||
%f = mul <16 x i64> %d, %e
|
||||
ret <16 x i64> %f
|
||||
}
|
||||
|
||||
define <8 x i16> @vwmulsu_vx_v8i16_i8(<8 x i8>* %x, i8* %y) {
|
||||
; CHECK-LABEL: vwmulsu_vx_v8i16_i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
|
||||
; CHECK-NEXT: vle8.v v9, (a0)
|
||||
; CHECK-NEXT: lbu a0, 0(a1)
|
||||
; CHECK-NEXT: vwmulsu.vx v8, v9, a0
|
||||
; CHECK-NEXT: ret
|
||||
%a = load <8 x i8>, <8 x i8>* %x
|
||||
%b = load i8, i8* %y
|
||||
%c = zext i8 %b to i16
|
||||
%d = insertelement <8 x i16> poison, i16 %c, i32 0
|
||||
%e = shufflevector <8 x i16> %d, <8 x i16> poison, <8 x i32> zeroinitializer
|
||||
%f = sext <8 x i8> %a to <8 x i16>
|
||||
%g = mul <8 x i16> %e, %f
|
||||
ret <8 x i16> %g
|
||||
}
|
||||
|
||||
define <8 x i16> @vwmulsu_vx_v8i16_i8_swap(<8 x i8>* %x, i8* %y) {
|
||||
; CHECK-LABEL: vwmulsu_vx_v8i16_i8_swap:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
|
||||
; CHECK-NEXT: vle8.v v8, (a0)
|
||||
; CHECK-NEXT: lb a0, 0(a1)
|
||||
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu
|
||||
; CHECK-NEXT: vzext.vf2 v9, v8
|
||||
; CHECK-NEXT: vmul.vx v8, v9, a0
|
||||
; CHECK-NEXT: ret
|
||||
%a = load <8 x i8>, <8 x i8>* %x
|
||||
%b = load i8, i8* %y
|
||||
%c = sext i8 %b to i16
|
||||
%d = insertelement <8 x i16> poison, i16 %c, i32 0
|
||||
%e = shufflevector <8 x i16> %d, <8 x i16> poison, <8 x i32> zeroinitializer
|
||||
%f = zext <8 x i8> %a to <8 x i16>
|
||||
%g = mul <8 x i16> %e, %f
|
||||
ret <8 x i16> %g
|
||||
}
|
||||
|
||||
define <4 x i32> @vwmulsu_vx_v4i32_i8(<4 x i16>* %x, i8* %y) {
|
||||
; CHECK-LABEL: vwmulsu_vx_v4i32_i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
|
||||
; CHECK-NEXT: vle16.v v9, (a0)
|
||||
; CHECK-NEXT: lbu a0, 0(a1)
|
||||
; CHECK-NEXT: vwmul.vx v8, v9, a0
|
||||
; CHECK-NEXT: ret
|
||||
%a = load <4 x i16>, <4 x i16>* %x
|
||||
%b = load i8, i8* %y
|
||||
%c = zext i8 %b to i32
|
||||
%d = insertelement <4 x i32> poison, i32 %c, i32 0
|
||||
%e = shufflevector <4 x i32> %d, <4 x i32> poison, <4 x i32> zeroinitializer
|
||||
%f = sext <4 x i16> %a to <4 x i32>
|
||||
%g = mul <4 x i32> %e, %f
|
||||
ret <4 x i32> %g
|
||||
}
|
||||
|
||||
define <4 x i32> @vwmulsu_vx_v4i32_i16(<4 x i16>* %x, i16* %y) {
|
||||
; CHECK-LABEL: vwmulsu_vx_v4i32_i16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
|
||||
; CHECK-NEXT: vle16.v v9, (a0)
|
||||
; CHECK-NEXT: lhu a0, 0(a1)
|
||||
; CHECK-NEXT: vwmulsu.vx v8, v9, a0
|
||||
; CHECK-NEXT: ret
|
||||
%a = load <4 x i16>, <4 x i16>* %x
|
||||
%b = load i16, i16* %y
|
||||
%c = zext i16 %b to i32
|
||||
%d = insertelement <4 x i32> poison, i32 %c, i32 0
|
||||
%e = shufflevector <4 x i32> %d, <4 x i32> poison, <4 x i32> zeroinitializer
|
||||
%f = sext <4 x i16> %a to <4 x i32>
|
||||
%g = mul <4 x i32> %e, %f
|
||||
ret <4 x i32> %g
|
||||
}
|
||||
|
||||
define <2 x i64> @vwmulsu_vx_v2i64_i8(<2 x i32>* %x, i8* %y) {
|
||||
; RV32-LABEL: vwmulsu_vx_v2i64_i8:
|
||||
; RV32: # %bb.0:
|
||||
; RV32-NEXT: addi sp, sp, -16
|
||||
; RV32-NEXT: .cfi_def_cfa_offset 16
|
||||
; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
|
||||
; RV32-NEXT: lbu a1, 0(a1)
|
||||
; RV32-NEXT: vle32.v v8, (a0)
|
||||
; RV32-NEXT: sw zero, 12(sp)
|
||||
; RV32-NEXT: sw a1, 8(sp)
|
||||
; RV32-NEXT: addi a0, sp, 8
|
||||
; RV32-NEXT: vlse64.v v9, (a0), zero
|
||||
; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu
|
||||
; RV32-NEXT: vsext.vf2 v10, v8
|
||||
; RV32-NEXT: vmul.vv v8, v9, v10
|
||||
; RV32-NEXT: addi sp, sp, 16
|
||||
; RV32-NEXT: ret
|
||||
;
|
||||
; RV64-LABEL: vwmulsu_vx_v2i64_i8:
|
||||
; RV64: # %bb.0:
|
||||
; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
|
||||
; RV64-NEXT: vle32.v v9, (a0)
|
||||
; RV64-NEXT: lbu a0, 0(a1)
|
||||
; RV64-NEXT: vwmul.vx v8, v9, a0
|
||||
; RV64-NEXT: ret
|
||||
%a = load <2 x i32>, <2 x i32>* %x
|
||||
%b = load i8, i8* %y
|
||||
%c = zext i8 %b to i64
|
||||
%d = insertelement <2 x i64> poison, i64 %c, i64 0
|
||||
%e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer
|
||||
%f = sext <2 x i32> %a to <2 x i64>
|
||||
%g = mul <2 x i64> %e, %f
|
||||
ret <2 x i64> %g
|
||||
}
|
||||
|
||||
define <2 x i64> @vwmulsu_vx_v2i64_i16(<2 x i32>* %x, i16* %y) {
|
||||
; RV32-LABEL: vwmulsu_vx_v2i64_i16:
|
||||
; RV32: # %bb.0:
|
||||
; RV32-NEXT: addi sp, sp, -16
|
||||
; RV32-NEXT: .cfi_def_cfa_offset 16
|
||||
; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
|
||||
; RV32-NEXT: lhu a1, 0(a1)
|
||||
; RV32-NEXT: vle32.v v8, (a0)
|
||||
; RV32-NEXT: sw zero, 12(sp)
|
||||
; RV32-NEXT: sw a1, 8(sp)
|
||||
; RV32-NEXT: addi a0, sp, 8
|
||||
; RV32-NEXT: vlse64.v v9, (a0), zero
|
||||
; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu
|
||||
; RV32-NEXT: vsext.vf2 v10, v8
|
||||
; RV32-NEXT: vmul.vv v8, v9, v10
|
||||
; RV32-NEXT: addi sp, sp, 16
|
||||
; RV32-NEXT: ret
|
||||
;
|
||||
; RV64-LABEL: vwmulsu_vx_v2i64_i16:
|
||||
; RV64: # %bb.0:
|
||||
; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
|
||||
; RV64-NEXT: vle32.v v9, (a0)
|
||||
; RV64-NEXT: lhu a0, 0(a1)
|
||||
; RV64-NEXT: vwmul.vx v8, v9, a0
|
||||
; RV64-NEXT: ret
|
||||
%a = load <2 x i32>, <2 x i32>* %x
|
||||
%b = load i16, i16* %y
|
||||
%c = zext i16 %b to i64
|
||||
%d = insertelement <2 x i64> poison, i64 %c, i64 0
|
||||
%e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer
|
||||
%f = sext <2 x i32> %a to <2 x i64>
|
||||
%g = mul <2 x i64> %e, %f
|
||||
ret <2 x i64> %g
|
||||
}
|
||||
|
||||
define <2 x i64> @vwmulsu_vx_v2i64_i32(<2 x i32>* %x, i32* %y) {
|
||||
; RV32-LABEL: vwmulsu_vx_v2i64_i32:
|
||||
; RV32: # %bb.0:
|
||||
; RV32-NEXT: addi sp, sp, -16
|
||||
; RV32-NEXT: .cfi_def_cfa_offset 16
|
||||
; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
|
||||
; RV32-NEXT: lw a1, 0(a1)
|
||||
; RV32-NEXT: vle32.v v8, (a0)
|
||||
; RV32-NEXT: sw zero, 12(sp)
|
||||
; RV32-NEXT: sw a1, 8(sp)
|
||||
; RV32-NEXT: addi a0, sp, 8
|
||||
; RV32-NEXT: vlse64.v v9, (a0), zero
|
||||
; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu
|
||||
; RV32-NEXT: vsext.vf2 v10, v8
|
||||
; RV32-NEXT: vmul.vv v8, v9, v10
|
||||
; RV32-NEXT: addi sp, sp, 16
|
||||
; RV32-NEXT: ret
|
||||
;
|
||||
; RV64-LABEL: vwmulsu_vx_v2i64_i32:
|
||||
; RV64: # %bb.0:
|
||||
; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
|
||||
; RV64-NEXT: vle32.v v9, (a0)
|
||||
; RV64-NEXT: lwu a0, 0(a1)
|
||||
; RV64-NEXT: vwmulsu.vx v8, v9, a0
|
||||
; RV64-NEXT: ret
|
||||
%a = load <2 x i32>, <2 x i32>* %x
|
||||
%b = load i32, i32* %y
|
||||
%c = zext i32 %b to i64
|
||||
%d = insertelement <2 x i64> poison, i64 %c, i64 0
|
||||
%e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer
|
||||
%f = sext <2 x i32> %a to <2 x i64>
|
||||
%g = mul <2 x i64> %e, %f
|
||||
ret <2 x i64> %g
|
||||
}
|
||||
|
||||
define <8 x i16> @vwmulsu_vx_v8i16_i8_and(<8 x i8>* %x, i16 %y) {
|
||||
; CHECK-LABEL: vwmulsu_vx_v8i16_i8_and:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
|
||||
; CHECK-NEXT: vle8.v v9, (a0)
|
||||
; CHECK-NEXT: vwmulsu.vx v8, v9, a1
|
||||
; CHECK-NEXT: ret
|
||||
%a = load <8 x i8>, <8 x i8>* %x
|
||||
%b = and i16 %y, 255
|
||||
%c = insertelement <8 x i16> poison, i16 %b, i32 0
|
||||
%d = shufflevector <8 x i16> %c, <8 x i16> poison, <8 x i32> zeroinitializer
|
||||
%e = sext <8 x i8> %a to <8 x i16>
|
||||
%f = mul <8 x i16> %d, %e
|
||||
ret <8 x i16> %f
|
||||
}
|
||||
|
||||
define <8 x i16> @vwmulsu_vx_v8i16_i8_and1(<8 x i8>* %x, i16 %y) {
|
||||
; CHECK-LABEL: vwmulsu_vx_v8i16_i8_and1:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
|
||||
; CHECK-NEXT: vle8.v v9, (a0)
|
||||
; CHECK-NEXT: andi a0, a1, 254
|
||||
; CHECK-NEXT: vwmulsu.vx v8, v9, a0
|
||||
; CHECK-NEXT: ret
|
||||
%a = load <8 x i8>, <8 x i8>* %x
|
||||
%b = and i16 %y, 254
|
||||
%c = insertelement <8 x i16> poison, i16 %b, i32 0
|
||||
%d = shufflevector <8 x i16> %c, <8 x i16> poison, <8 x i32> zeroinitializer
|
||||
%e = sext <8 x i8> %a to <8 x i16>
|
||||
%f = mul <8 x i16> %d, %e
|
||||
ret <8 x i16> %f
|
||||
}
|
||||
|
||||
define <4 x i32> @vwmulsu_vx_v4i32_i16_and(<4 x i16>* %x, i32 %y) {
|
||||
; CHECK-LABEL: vwmulsu_vx_v4i32_i16_and:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
|
||||
; CHECK-NEXT: vle16.v v9, (a0)
|
||||
; CHECK-NEXT: vwmulsu.vx v8, v9, a1
|
||||
; CHECK-NEXT: ret
|
||||
%a = load <4 x i16>, <4 x i16>* %x
|
||||
%b = and i32 %y, 65535
|
||||
%c = insertelement <4 x i32> poison, i32 %b, i32 0
|
||||
%d = shufflevector <4 x i32> %c, <4 x i32> poison, <4 x i32> zeroinitializer
|
||||
%e = sext <4 x i16> %a to <4 x i32>
|
||||
%f = mul <4 x i32> %d, %e
|
||||
ret <4 x i32> %f
|
||||
}
|
||||
|
||||
define <4 x i32> @vwmulsu_vx_v4i32_i16_zext(<4 x i16>* %x, i16 %y) {
|
||||
; CHECK-LABEL: vwmulsu_vx_v4i32_i16_zext:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
|
||||
; CHECK-NEXT: vle16.v v9, (a0)
|
||||
; CHECK-NEXT: vwmulsu.vx v8, v9, a1
|
||||
; CHECK-NEXT: ret
|
||||
%a = load <4 x i16>, <4 x i16>* %x
|
||||
%b = zext i16 %y to i32
|
||||
%c = insertelement <4 x i32> poison, i32 %b, i32 0
|
||||
%d = shufflevector <4 x i32> %c, <4 x i32> poison, <4 x i32> zeroinitializer
|
||||
%e = sext <4 x i16> %a to <4 x i32>
|
||||
%f = mul <4 x i32> %d, %e
|
||||
ret <4 x i32> %f
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue