forked from OSchip/llvm-project
Generate AVX/AVX2 shuffles even when there is a memory op somewhere else in the program.
Starting r155461 we are able to select patterns for vbroadcast even when the load op is used by other users. Fix PR11900. llvm-svn: 156539
This commit is contained in:
parent
66e6de10cf
commit
b86a3fb8d0
|
@ -5029,10 +5029,6 @@ X86TargetLowering::LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const {
|
||||||
if (!ISD::isNormalLoad(Ld.getNode()))
|
if (!ISD::isNormalLoad(Ld.getNode()))
|
||||||
return SDValue();
|
return SDValue();
|
||||||
|
|
||||||
// Reject loads that have uses of the chain result
|
|
||||||
if (Ld->hasAnyUseOfValue(1))
|
|
||||||
return SDValue();
|
|
||||||
|
|
||||||
unsigned ScalarSize = Ld.getValueType().getSizeInBits();
|
unsigned ScalarSize = Ld.getValueType().getSizeInBits();
|
||||||
|
|
||||||
if (ScalarSize == 32 || (Is256 && ScalarSize == 64))
|
if (ScalarSize == 32 || (Is256 && ScalarSize == 64))
|
||||||
|
|
|
@ -112,3 +112,20 @@ entry:
|
||||||
%vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1
|
%vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1
|
||||||
ret <2 x double> %vecinit2.i
|
ret <2 x double> %vecinit2.i
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; CHECK: _RR
|
||||||
|
; CHECK: vbroadcastss (%
|
||||||
|
; CHECK: ret
|
||||||
|
define <4 x float> @_RR(float* %ptr, i32* %k) nounwind uwtable readnone ssp {
|
||||||
|
entry:
|
||||||
|
%q = load float* %ptr, align 4
|
||||||
|
%vecinit.i = insertelement <4 x float> undef, float %q, i32 0
|
||||||
|
%vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
|
||||||
|
%vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
|
||||||
|
%vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
|
||||||
|
; force a chain
|
||||||
|
%j = load i32* %k, align 4
|
||||||
|
store i32 %j, i32* undef
|
||||||
|
ret <4 x float> %vecinit6.i
|
||||||
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue