2017-06-13 01:31:36 +08:00
|
|
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
2016-07-11 22:17:54 +08:00
|
|
|
; RUN: llc < %s -o - -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE
|
|
|
|
; RUN: llc < %s -o - -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX
|
2014-02-22 07:42:41 +08:00
|
|
|
|
|
|
|
define <8 x i16> @foo(<8 x i16> %a, <8 x i16> %b) {
|
2016-07-11 22:07:31 +08:00
|
|
|
; SSE-LABEL: foo:
|
2016-07-11 22:17:54 +08:00
|
|
|
; SSE: # BB#0:
|
2016-07-11 22:07:31 +08:00
|
|
|
; SSE-NEXT: pcmpeqw %xmm1, %xmm0
|
|
|
|
; SSE-NEXT: pand {{.*}}(%rip), %xmm0
|
|
|
|
; SSE-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: foo:
|
2016-07-11 22:17:54 +08:00
|
|
|
; AVX: # BB#0:
|
2016-07-11 22:07:31 +08:00
|
|
|
; AVX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: retq
|
2014-02-22 07:42:41 +08:00
|
|
|
%icmp = icmp eq <8 x i16> %a, %b
|
|
|
|
%zext = zext <8 x i1> %icmp to <8 x i16>
|
|
|
|
%shl = shl nuw nsw <8 x i16> %zext, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
|
|
|
|
ret <8 x i16> %shl
|
|
|
|
}
|
|
|
|
|
|
|
|
; Don't fail with an assert due to an undef in the buildvector
|
|
|
|
define <8 x i16> @bar(<8 x i16> %a, <8 x i16> %b) {
|
2016-07-11 22:07:31 +08:00
|
|
|
; SSE-LABEL: bar:
|
2016-07-11 22:17:54 +08:00
|
|
|
; SSE: # BB#0:
|
2016-07-11 22:07:31 +08:00
|
|
|
; SSE-NEXT: pcmpeqw %xmm1, %xmm0
|
[DAGCombiner] Fix infinite loop in vector mul/shl combining
We have the following DAGCombiner transformations:
(mul (shl X, c1), c2) -> (mul X, c2 << c1)
(mul (shl X, C), Y) -> (shl (mul X, Y), C)
(shl (mul x, c1), c2) -> (mul x, c1 << c2)
Usually the constant shift is optimised by SelectionDAG::getNode when it is
constructed, by SelectionDAG::FoldConstantArithmetic, but when we're dealing
with vectors and one of those vector constants contains an undef element
FoldConstantArithmetic does not fold and we enter an infinite loop.
Fix this by making FoldConstantArithmetic use getNode to decide how to fold each
vector element, the same as FoldConstantVectorArithmetic does, and rather than
adding the constant shift to the work list instead only apply the transformation
if it's already been folded into a constant, as if it's not we're going to loop
endlessly. Additionally add missing NoOpaques to one of those transformations,
which I noticed when writing the tests for this.
Differential Revision: https://reviews.llvm.org/D26605
llvm-svn: 287766
2016-11-24 00:05:51 +08:00
|
|
|
; SSE-NEXT: pand {{.*}}(%rip), %xmm0
|
2016-07-11 22:07:31 +08:00
|
|
|
; SSE-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: bar:
|
2016-07-11 22:17:54 +08:00
|
|
|
; AVX: # BB#0:
|
2016-07-11 22:07:31 +08:00
|
|
|
; AVX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
|
[DAGCombiner] Fix infinite loop in vector mul/shl combining
We have the following DAGCombiner transformations:
(mul (shl X, c1), c2) -> (mul X, c2 << c1)
(mul (shl X, C), Y) -> (shl (mul X, Y), C)
(shl (mul x, c1), c2) -> (mul x, c1 << c2)
Usually the constant shift is optimised by SelectionDAG::getNode when it is
constructed, by SelectionDAG::FoldConstantArithmetic, but when we're dealing
with vectors and one of those vector constants contains an undef element
FoldConstantArithmetic does not fold and we enter an infinite loop.
Fix this by making FoldConstantArithmetic use getNode to decide how to fold each
vector element, the same as FoldConstantVectorArithmetic does, and rather than
adding the constant shift to the work list instead only apply the transformation
if it's already been folded into a constant, as if it's not we're going to loop
endlessly. Additionally add missing NoOpaques to one of those transformations,
which I noticed when writing the tests for this.
Differential Revision: https://reviews.llvm.org/D26605
llvm-svn: 287766
2016-11-24 00:05:51 +08:00
|
|
|
; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
2016-07-11 22:07:31 +08:00
|
|
|
; AVX-NEXT: retq
|
2014-02-22 07:42:41 +08:00
|
|
|
%icmp = icmp eq <8 x i16> %a, %b
|
|
|
|
%zext = zext <8 x i1> %icmp to <8 x i16>
|
|
|
|
%shl = shl nuw nsw <8 x i16> %zext, <i16 5, i16 undef, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
|
|
|
|
ret <8 x i16> %shl
|
|
|
|
}
|
2016-07-11 22:07:31 +08:00
|
|
|
|