forked from OSchip/llvm-project
[InstCombine] Split off SSE4a tests.
These aren't vector demanded bits tests. More tests to follow. llvm-svn: 243223
This commit is contained in:
parent
a37e765497
commit
357b85c926
|
@ -210,130 +210,6 @@ define <4 x float> @test_select(float %f, float %g) {
|
|||
ret <4 x float> %ret
|
||||
}
|
||||
|
||||
; We should optimize these two redundant insertqi into one
|
||||
; CHECK: define <2 x i64> @testInsertTwice(<2 x i64> %v, <2 x i64> %i)
|
||||
define <2 x i64> @testInsertTwice(<2 x i64> %v, <2 x i64> %i) {
|
||||
; CHECK: call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 32)
|
||||
; CHECK-NOT: insertqi
|
||||
%1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 32)
|
||||
%2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 32, i8 32)
|
||||
ret <2 x i64> %2
|
||||
}
|
||||
|
||||
; The result of this insert is the second arg, since the top 64 bits of
|
||||
; the result are undefined, and we copy the bottom 64 bits from the
|
||||
; second arg
|
||||
; CHECK: define <2 x i64> @testInsert64Bits(<2 x i64> %v, <2 x i64> %i)
|
||||
define <2 x i64> @testInsert64Bits(<2 x i64> %v, <2 x i64> %i) {
|
||||
; CHECK: ret <2 x i64> %i
|
||||
%1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 64, i8 0)
|
||||
ret <2 x i64> %1
|
||||
}
|
||||
|
||||
; Test the several types of ranges and ordering that exist for two insertqi
|
||||
; CHECK: define <2 x i64> @testInsertContainedRange(<2 x i64> %v, <2 x i64> %i)
|
||||
define <2 x i64> @testInsertContainedRange(<2 x i64> %v, <2 x i64> %i) {
|
||||
; CHECK: %[[RES:.*]] = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 0)
|
||||
; CHECK: ret <2 x i64> %[[RES]]
|
||||
%1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 0)
|
||||
%2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 16, i8 16)
|
||||
ret <2 x i64> %2
|
||||
}
|
||||
|
||||
; CHECK: define <2 x i64> @testInsertContainedRange_2(<2 x i64> %v, <2 x i64> %i)
|
||||
define <2 x i64> @testInsertContainedRange_2(<2 x i64> %v, <2 x i64> %i) {
|
||||
; CHECK: %[[RES:.*]] = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 0)
|
||||
; CHECK: ret <2 x i64> %[[RES]]
|
||||
%1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 16, i8 16)
|
||||
%2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 32, i8 0)
|
||||
ret <2 x i64> %2
|
||||
}
|
||||
|
||||
; CHECK: define <2 x i64> @testInsertOverlappingRange(<2 x i64> %v, <2 x i64> %i)
|
||||
define <2 x i64> @testInsertOverlappingRange(<2 x i64> %v, <2 x i64> %i) {
|
||||
; CHECK: %[[RES:.*]] = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 48, i8 0)
|
||||
; CHECK: ret <2 x i64> %[[RES]]
|
||||
%1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 0)
|
||||
%2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 32, i8 16)
|
||||
ret <2 x i64> %2
|
||||
}
|
||||
|
||||
; CHECK: define <2 x i64> @testInsertOverlappingRange_2(<2 x i64> %v, <2 x i64> %i)
|
||||
define <2 x i64> @testInsertOverlappingRange_2(<2 x i64> %v, <2 x i64> %i) {
|
||||
; CHECK: %[[RES:.*]] = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 48, i8 0)
|
||||
; CHECK: ret <2 x i64> %[[RES]]
|
||||
%1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 16)
|
||||
%2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 32, i8 0)
|
||||
ret <2 x i64> %2
|
||||
}
|
||||
|
||||
; CHECK: define <2 x i64> @testInsertAdjacentRange(<2 x i64> %v, <2 x i64> %i)
|
||||
define <2 x i64> @testInsertAdjacentRange(<2 x i64> %v, <2 x i64> %i) {
|
||||
; CHECK: %[[RES:.*]] = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 48, i8 0)
|
||||
; CHECK: ret <2 x i64> %[[RES]]
|
||||
%1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 0)
|
||||
%2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 16, i8 32)
|
||||
ret <2 x i64> %2
|
||||
}
|
||||
|
||||
; CHECK: define <2 x i64> @testInsertAdjacentRange_2(<2 x i64> %v, <2 x i64> %i)
|
||||
define <2 x i64> @testInsertAdjacentRange_2(<2 x i64> %v, <2 x i64> %i) {
|
||||
; CHECK: %[[RES:.*]] = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 48, i8 0)
|
||||
; CHECK: ret <2 x i64> %[[RES]]
|
||||
%1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 16, i8 32)
|
||||
%2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 32, i8 0)
|
||||
ret <2 x i64> %2
|
||||
}
|
||||
|
||||
; CHECK: define <2 x i64> @testInsertDisjointRange(<2 x i64> %v, <2 x i64> %i)
|
||||
define <2 x i64> @testInsertDisjointRange(<2 x i64> %v, <2 x i64> %i) {
|
||||
; CHECK: tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 16, i8 0)
|
||||
; CHECK: tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 16, i8 32)
|
||||
%1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 16, i8 0)
|
||||
%2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 16, i8 32)
|
||||
ret <2 x i64> %2
|
||||
}
|
||||
|
||||
; CHECK: define <2 x i64> @testInsertDisjointRange_2(<2 x i64> %v, <2 x i64> %i)
|
||||
define <2 x i64> @testInsertDisjointRange_2(<2 x i64> %v, <2 x i64> %i) {
|
||||
; CHECK: tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 16, i8 0)
|
||||
; CHECK: tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 16, i8 32)
|
||||
%1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 16, i8 0)
|
||||
%2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 16, i8 32)
|
||||
ret <2 x i64> %2
|
||||
}
|
||||
|
||||
; CHECK: define <2 x i64> @testZeroLength(<2 x i64> %v, <2 x i64> %i)
|
||||
define <2 x i64> @testZeroLength(<2 x i64> %v, <2 x i64> %i) {
|
||||
; CHECK: ret <2 x i64> %i
|
||||
%1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 0, i8 0)
|
||||
ret <2 x i64> %1
|
||||
}
|
||||
|
||||
; CHECK: define <2 x i64> @testUndefinedInsertq_1(<2 x i64> %v, <2 x i64> %i)
|
||||
define <2 x i64> @testUndefinedInsertq_1(<2 x i64> %v, <2 x i64> %i) {
|
||||
; CHECK: ret <2 x i64> undef
|
||||
%1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 0, i8 16)
|
||||
ret <2 x i64> %1
|
||||
}
|
||||
|
||||
; CHECK: define <2 x i64> @testUndefinedInsertq_2(<2 x i64> %v, <2 x i64> %i)
|
||||
define <2 x i64> @testUndefinedInsertq_2(<2 x i64> %v, <2 x i64> %i) {
|
||||
; CHECK: ret <2 x i64> undef
|
||||
%1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 48, i8 32)
|
||||
ret <2 x i64> %1
|
||||
}
|
||||
|
||||
; CHECK: define <2 x i64> @testUndefinedInsertq_3(<2 x i64> %v, <2 x i64> %i)
|
||||
define <2 x i64> @testUndefinedInsertq_3(<2 x i64> %v, <2 x i64> %i) {
|
||||
; CHECK: ret <2 x i64> undef
|
||||
%1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 64, i8 16)
|
||||
ret <2 x i64> %1
|
||||
}
|
||||
|
||||
; CHECK: declare <2 x i64> @llvm.x86.sse4a.insertqi
|
||||
declare <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64>, <2 x i64>, i8, i8) nounwind
|
||||
|
||||
declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>)
|
||||
define <4 x float> @test_vpermilvar_ps(<4 x float> %v) {
|
||||
; CHECK-LABEL: @test_vpermilvar_ps(
|
||||
|
|
|
@ -0,0 +1,125 @@
|
|||
; RUN: opt < %s -instcombine -S | FileCheck %s
|
||||
|
||||
; We should optimize these two redundant insertqi into one
|
||||
; CHECK: define <2 x i64> @testInsertTwice(<2 x i64> %v, <2 x i64> %i)
|
||||
define <2 x i64> @testInsertTwice(<2 x i64> %v, <2 x i64> %i) {
|
||||
; CHECK: call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 32)
|
||||
; CHECK-NOT: insertqi
|
||||
%1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 32)
|
||||
%2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 32, i8 32)
|
||||
ret <2 x i64> %2
|
||||
}
|
||||
|
||||
; The result of this insert is the second arg, since the top 64 bits of
|
||||
; the result are undefined, and we copy the bottom 64 bits from the
|
||||
; second arg
|
||||
; CHECK: define <2 x i64> @testInsert64Bits(<2 x i64> %v, <2 x i64> %i)
|
||||
define <2 x i64> @testInsert64Bits(<2 x i64> %v, <2 x i64> %i) {
|
||||
; CHECK: ret <2 x i64> %i
|
||||
%1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 64, i8 0)
|
||||
ret <2 x i64> %1
|
||||
}
|
||||
|
||||
; Test the several types of ranges and ordering that exist for two insertqi
|
||||
; CHECK: define <2 x i64> @testInsertContainedRange(<2 x i64> %v, <2 x i64> %i)
|
||||
define <2 x i64> @testInsertContainedRange(<2 x i64> %v, <2 x i64> %i) {
|
||||
; CHECK: %[[RES:.*]] = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 0)
|
||||
; CHECK: ret <2 x i64> %[[RES]]
|
||||
%1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 0)
|
||||
%2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 16, i8 16)
|
||||
ret <2 x i64> %2
|
||||
}
|
||||
|
||||
; CHECK: define <2 x i64> @testInsertContainedRange_2(<2 x i64> %v, <2 x i64> %i)
|
||||
define <2 x i64> @testInsertContainedRange_2(<2 x i64> %v, <2 x i64> %i) {
|
||||
; CHECK: %[[RES:.*]] = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 0)
|
||||
; CHECK: ret <2 x i64> %[[RES]]
|
||||
%1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 16, i8 16)
|
||||
%2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 32, i8 0)
|
||||
ret <2 x i64> %2
|
||||
}
|
||||
|
||||
; CHECK: define <2 x i64> @testInsertOverlappingRange(<2 x i64> %v, <2 x i64> %i)
|
||||
define <2 x i64> @testInsertOverlappingRange(<2 x i64> %v, <2 x i64> %i) {
|
||||
; CHECK: %[[RES:.*]] = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 48, i8 0)
|
||||
; CHECK: ret <2 x i64> %[[RES]]
|
||||
%1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 0)
|
||||
%2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 32, i8 16)
|
||||
ret <2 x i64> %2
|
||||
}
|
||||
|
||||
; CHECK: define <2 x i64> @testInsertOverlappingRange_2(<2 x i64> %v, <2 x i64> %i)
|
||||
define <2 x i64> @testInsertOverlappingRange_2(<2 x i64> %v, <2 x i64> %i) {
|
||||
; CHECK: %[[RES:.*]] = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 48, i8 0)
|
||||
; CHECK: ret <2 x i64> %[[RES]]
|
||||
%1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 16)
|
||||
%2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 32, i8 0)
|
||||
ret <2 x i64> %2
|
||||
}
|
||||
|
||||
; CHECK: define <2 x i64> @testInsertAdjacentRange(<2 x i64> %v, <2 x i64> %i)
|
||||
define <2 x i64> @testInsertAdjacentRange(<2 x i64> %v, <2 x i64> %i) {
|
||||
; CHECK: %[[RES:.*]] = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 48, i8 0)
|
||||
; CHECK: ret <2 x i64> %[[RES]]
|
||||
%1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 0)
|
||||
%2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 16, i8 32)
|
||||
ret <2 x i64> %2
|
||||
}
|
||||
|
||||
; CHECK: define <2 x i64> @testInsertAdjacentRange_2(<2 x i64> %v, <2 x i64> %i)
|
||||
define <2 x i64> @testInsertAdjacentRange_2(<2 x i64> %v, <2 x i64> %i) {
|
||||
; CHECK: %[[RES:.*]] = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 48, i8 0)
|
||||
; CHECK: ret <2 x i64> %[[RES]]
|
||||
%1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 16, i8 32)
|
||||
%2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 32, i8 0)
|
||||
ret <2 x i64> %2
|
||||
}
|
||||
|
||||
; CHECK: define <2 x i64> @testInsertDisjointRange(<2 x i64> %v, <2 x i64> %i)
|
||||
define <2 x i64> @testInsertDisjointRange(<2 x i64> %v, <2 x i64> %i) {
|
||||
; CHECK: tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 16, i8 0)
|
||||
; CHECK: tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 16, i8 32)
|
||||
%1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 16, i8 0)
|
||||
%2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 16, i8 32)
|
||||
ret <2 x i64> %2
|
||||
}
|
||||
|
||||
; CHECK: define <2 x i64> @testInsertDisjointRange_2(<2 x i64> %v, <2 x i64> %i)
|
||||
define <2 x i64> @testInsertDisjointRange_2(<2 x i64> %v, <2 x i64> %i) {
|
||||
; CHECK: tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 16, i8 0)
|
||||
; CHECK: tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 16, i8 32)
|
||||
%1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 16, i8 0)
|
||||
%2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 16, i8 32)
|
||||
ret <2 x i64> %2
|
||||
}
|
||||
|
||||
; CHECK: define <2 x i64> @testZeroLength(<2 x i64> %v, <2 x i64> %i)
|
||||
define <2 x i64> @testZeroLength(<2 x i64> %v, <2 x i64> %i) {
|
||||
; CHECK: ret <2 x i64> %i
|
||||
%1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 0, i8 0)
|
||||
ret <2 x i64> %1
|
||||
}
|
||||
|
||||
; CHECK: define <2 x i64> @testUndefinedInsertq_1(<2 x i64> %v, <2 x i64> %i)
|
||||
define <2 x i64> @testUndefinedInsertq_1(<2 x i64> %v, <2 x i64> %i) {
|
||||
; CHECK: ret <2 x i64> undef
|
||||
%1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 0, i8 16)
|
||||
ret <2 x i64> %1
|
||||
}
|
||||
|
||||
; CHECK: define <2 x i64> @testUndefinedInsertq_2(<2 x i64> %v, <2 x i64> %i)
|
||||
define <2 x i64> @testUndefinedInsertq_2(<2 x i64> %v, <2 x i64> %i) {
|
||||
; CHECK: ret <2 x i64> undef
|
||||
%1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 48, i8 32)
|
||||
ret <2 x i64> %1
|
||||
}
|
||||
|
||||
; CHECK: define <2 x i64> @testUndefinedInsertq_3(<2 x i64> %v, <2 x i64> %i)
|
||||
define <2 x i64> @testUndefinedInsertq_3(<2 x i64> %v, <2 x i64> %i) {
|
||||
; CHECK: ret <2 x i64> undef
|
||||
%1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 64, i8 16)
|
||||
ret <2 x i64> %1
|
||||
}
|
||||
|
||||
; CHECK: declare <2 x i64> @llvm.x86.sse4a.insertqi
|
||||
declare <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64>, <2 x i64>, i8, i8) nounwind
|
Loading…
Reference in New Issue