Generate AVX/AVX2 shuffles even when there is a memory op somewhere else in the program.

Starting r155461 we are able to select patterns for vbroadcast even when the load op is used by other users.

Fix PR11900.

llvm-svn: 156539
This commit is contained in:
Nadav Rotem 2012-05-10 12:22:05 +00:00
parent 66e6de10cf
commit b86a3fb8d0
2 changed files with 17 additions and 4 deletions

View File

@ -5029,10 +5029,6 @@ X86TargetLowering::LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const {
if (!ISD::isNormalLoad(Ld.getNode())) if (!ISD::isNormalLoad(Ld.getNode()))
return SDValue(); return SDValue();
// Reject loads that have uses of the chain result
if (Ld->hasAnyUseOfValue(1))
return SDValue();
unsigned ScalarSize = Ld.getValueType().getSizeInBits(); unsigned ScalarSize = Ld.getValueType().getSizeInBits();
if (ScalarSize == 32 || (Is256 && ScalarSize == 64)) if (ScalarSize == 32 || (Is256 && ScalarSize == 64))

View File

@ -112,3 +112,20 @@ entry:
%vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1 %vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1
ret <2 x double> %vecinit2.i ret <2 x double> %vecinit2.i
} }
; CHECK: _RR
; CHECK: vbroadcastss (%
; CHECK: ret
define <4 x float> @_RR(float* %ptr, i32* %k) nounwind uwtable readnone ssp {
entry:
%q = load float* %ptr, align 4
%vecinit.i = insertelement <4 x float> undef, float %q, i32 0
%vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
%vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
%vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
; force a chain
%j = load i32* %k, align 4
store i32 %j, i32* undef
ret <4 x float> %vecinit6.i
}