forked from OSchip/llvm-project
[X86] LowerINSERT_VECTOR_ELT - always lower v32i8/v16i16 allones insertions on AVX1 as OR ops
v32i8/v16i16 blend shuffles on AVX1 will expand to OR(AND,ANDN) patterns which can be easily broken by other combines
This commit is contained in:
parent
d3292c4ba0
commit
8254966062
|
@ -19789,9 +19789,11 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
|
|||
bool IsAllOnesElt = VT.isInteger() && llvm::isAllOnesConstant(N1);
|
||||
|
||||
if (IsZeroElt || IsAllOnesElt) {
|
||||
// Lower insertion of i8 -1 as an 'OR' blend.
|
||||
// Lower insertion of v16i8/v32i8/v64i16 -1 elts as an 'OR' blend.
|
||||
// We don't deal with i8 0 since it appears to be handled elsewhere.
|
||||
if (IsAllOnesElt && EltSizeInBits == 8 && !Subtarget.hasSSE41()) {
|
||||
if (IsAllOnesElt &&
|
||||
((VT == MVT::v16i8 && !Subtarget.hasSSE41()) ||
|
||||
((VT == MVT::v32i8 || VT == MVT::v16i16) && !Subtarget.hasInt256()))) {
|
||||
SDValue ZeroCst = DAG.getConstant(0, dl, VT.getScalarType());
|
||||
SDValue OnesCst = DAG.getAllOnesConstant(dl, VT.getScalarType());
|
||||
SmallVector<SDValue, 8> CstVectorElts(NumElts, ZeroCst);
|
||||
|
|
|
@ -280,7 +280,8 @@ define <16 x i16> @insert_v16i16_x12345x789ABCDEx(<16 x i16> %a) {
|
|||
;
|
||||
; AVX1-LABEL: insert_v16i16_x12345x789ABCDEx:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX1-NEXT: vmovaps {{.*#+}} xmm1 = [65535,0,0,0]
|
||||
; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX1-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
|
@ -384,13 +385,11 @@ define <32 x i8> @insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx(<32 x i8> %a) {
|
|||
;
|
||||
; AVX1-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: movl $255, %eax
|
||||
; AVX1-NEXT: vpinsrb $0, %eax, %xmm0, %xmm1
|
||||
; AVX1-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; AVX1-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: vmovaps {{.*#+}} xmm1 = [255,0,0,0]
|
||||
; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX1-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX1-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
|
||||
|
|
Loading…
Reference in New Issue