[X86][SSE] Use llvm min/max intrinsics instead of (deprecated) sse intrinsics. NFCI.

These are auto-upgraded to the equivalent llvm variants now.
This commit is contained in:
Simon Pilgrim 2021-02-20 12:17:46 +00:00
parent a274062bd4
commit ee0dee7d38
3 changed files with 7 additions and 7 deletions

View File

@ -5,7 +5,7 @@ define void @test() {
; CHECK-LABEL: test: ; CHECK-LABEL: test:
; CHECK: # %bb.0: # %bb2 ; CHECK: # %bb.0: # %bb2
; CHECK-NEXT: retq ; CHECK-NEXT: retq
%tmp1 = call <8 x i16> @llvm.x86.sse2.pmins.w( <8 x i16> zeroinitializer, <8 x i16> bitcast (<4 x i32> < i32 7, i32 7, i32 7, i32 7 > to <8 x i16>) ) %tmp1 = call <8 x i16> @llvm.smin.v8i16( <8 x i16> zeroinitializer, <8 x i16> bitcast (<4 x i32> < i32 7, i32 7, i32 7, i32 7 > to <8 x i16>) )
%tmp2 = bitcast <8 x i16> %tmp1 to <4 x i32> %tmp2 = bitcast <8 x i16> %tmp1 to <4 x i32>
br i1 false, label %bb1, label %bb2 br i1 false, label %bb1, label %bb2
@ -17,4 +17,4 @@ bb1:
ret void ret void
} }
declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) declare <8 x i16> @llvm.smin.v8i16(<8 x i16>, <8 x i16>)

View File

@ -10,11 +10,11 @@ define void @test(<4 x i32>* nocapture %p) nounwind {
; CHECK-NEXT: vmovdqu %xmm0, (%rdi) ; CHECK-NEXT: vmovdqu %xmm0, (%rdi)
; CHECK-NEXT: retq ; CHECK-NEXT: retq
%a = load <4 x i32>, <4 x i32>* %p, align 1 %a = load <4 x i32>, <4 x i32>* %p, align 1
%b = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %a, <4 x i32> zeroinitializer) nounwind %b = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %a, <4 x i32> zeroinitializer) nounwind
%c = shufflevector <4 x i32> %b, <4 x i32> undef, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 2, i32 3> %c = shufflevector <4 x i32> %b, <4 x i32> undef, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 2, i32 3>
%d = shufflevector <8 x i32> %c, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> %d = shufflevector <8 x i32> %c, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
store <4 x i32> %d, <4 x i32>* %p, align 1 store <4 x i32> %d, <4 x i32>* %p, align 1
ret void ret void
} }
declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone declare <4 x i32> @llvm.smax.v4i32(<4 x i32>, <4 x i32>) nounwind readnone

View File

@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-linux-pc -mcpu=corei7 | FileCheck %s ; RUN: llc < %s -mtriple=x86_64-linux-pc -mcpu=corei7 | FileCheck %s
declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) declare <4 x i32> @llvm.umin.v4i32(<4 x i32>, <4 x i32>)
define <2 x i16> @good(<4 x i32>*, <4 x i8>*) { define <2 x i16> @good(<4 x i32>*, <4 x i8>*) {
; CHECK-LABEL: good: ; CHECK-LABEL: good:
@ -11,7 +11,7 @@ define <2 x i16> @good(<4 x i32>*, <4 x i8>*) {
; CHECK-NEXT: retq ; CHECK-NEXT: retq
entry: entry:
%2 = load <4 x i32>, <4 x i32>* %0, align 16 %2 = load <4 x i32>, <4 x i32>* %0, align 16
%3 = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %2, <4 x i32> <i32 127, i32 127, i32 127, i32 127>) %3 = call <4 x i32> @llvm.umin.v4i32(<4 x i32> %2, <4 x i32> <i32 127, i32 127, i32 127, i32 127>)
%4 = extractelement <4 x i32> %3, i32 0 %4 = extractelement <4 x i32> %3, i32 0
%5 = extractelement <4 x i32> %3, i32 1 %5 = extractelement <4 x i32> %3, i32 1
%6 = extractelement <4 x i32> %3, i32 2 %6 = extractelement <4 x i32> %3, i32 2
@ -30,7 +30,7 @@ define <2 x i16> @bad(<4 x i32>*, <4 x i8>*) {
; CHECK-NEXT: retq ; CHECK-NEXT: retq
entry: entry:
%2 = load <4 x i32>, <4 x i32>* %0, align 16 %2 = load <4 x i32>, <4 x i32>* %0, align 16
%3 = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %2, <4 x i32> <i32 127, i32 127, i32 127, i32 127>) %3 = call <4 x i32> @llvm.umin.v4i32(<4 x i32> %2, <4 x i32> <i32 127, i32 127, i32 127, i32 127>)
%4 = extractelement <4 x i32> %3, i32 0 %4 = extractelement <4 x i32> %3, i32 0
%5 = extractelement <4 x i32> %3, i32 1 %5 = extractelement <4 x i32> %3, i32 1
%6 = extractelement <4 x i32> %3, i32 2 %6 = extractelement <4 x i32> %3, i32 2