2013-08-24 04:21:34 +08:00
|
|
|
; RUN: llc < %s -o - -mcpu=generic -march=x86-64 -mattr=+sse4.2 | FileCheck %s
|
2010-01-24 08:05:03 +08:00
|
|
|
|
|
|
|
; Test based on pr5626 to load/store
|
|
|
|
;
|
|
|
|
|
|
|
|
%i32vec3 = type <3 x i32>
|
|
|
|
define void @add3i32(%i32vec3* sret %ret, %i32vec3* %ap, %i32vec3* %bp) {
|
2014-07-23 17:11:48 +08:00
|
|
|
; CHECK-LABEL: add3i32:
|
|
|
|
; CHECK: movdqa (%{{.*}}), %[[R0:xmm[0-9]+]]
|
|
|
|
; CHECK-NEXT: paddd (%{{.*}}), %[[R0]]
|
|
|
|
; CHECK-NEXT: pextrd $2, %[[R0]], 8(%{{.*}})
|
|
|
|
; CHECK-NEXT: movq %[[R0]], (%{{.*}})
|
2010-01-24 08:05:03 +08:00
|
|
|
%a = load %i32vec3* %ap, align 16
|
|
|
|
%b = load %i32vec3* %bp, align 16
|
|
|
|
%x = add %i32vec3 %a, %b
|
|
|
|
store %i32vec3 %x, %i32vec3* %ret, align 16
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define void @add3i32_2(%i32vec3* sret %ret, %i32vec3* %ap, %i32vec3* %bp) {
|
2014-07-23 17:11:48 +08:00
|
|
|
; CHECK-LABEL: add3i32_2:
|
|
|
|
; CHECK: movq (%{{.*}}), %[[R0:xmm[0-9]+]]
|
|
|
|
; CHECK-NEXT: pinsrd $2, 8(%{{.*}}), %[[R0]]
|
|
|
|
; CHECK-NEXT: movq (%{{.*}}), %[[R1:xmm[0-9]+]]
|
|
|
|
; CHECK-NEXT: pinsrd $2, 8(%{{.*}}), %[[R1]]
|
|
|
|
; CHECK-NEXT: paddd %[[R0]], %[[R1]]
|
|
|
|
; CHECK-NEXT: pextrd $2, %[[R1]], 8(%{{.*}})
|
|
|
|
; CHECK-NEXT: movq %[[R1]], (%{{.*}})
|
2010-04-24 03:41:15 +08:00
|
|
|
%a = load %i32vec3* %ap, align 8
|
|
|
|
%b = load %i32vec3* %bp, align 8
|
2010-01-24 08:05:03 +08:00
|
|
|
%x = add %i32vec3 %a, %b
|
2010-04-24 03:41:15 +08:00
|
|
|
store %i32vec3 %x, %i32vec3* %ret, align 8
|
2010-01-24 08:05:03 +08:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
%i32vec7 = type <7 x i32>
|
|
|
|
define void @add7i32(%i32vec7* sret %ret, %i32vec7* %ap, %i32vec7* %bp) {
|
2014-07-23 17:11:48 +08:00
|
|
|
; CHECK-LABEL: add7i32:
|
|
|
|
; CHECK: movdqa (%{{.*}}), %[[R0:xmm[0-9]+]]
|
|
|
|
; CHECK-NEXT: movdqa 16(%{{.*}}), %[[R1:xmm[0-9]+]]
|
|
|
|
; CHECK-NEXT: paddd (%{{.*}}), %[[R0]]
|
|
|
|
; CHECK-NEXT: paddd 16(%{{.*}}), %[[R1]]
|
|
|
|
; CHECK-NEXT: pextrd $2, %[[R1]], 24(%{{.*}})
|
|
|
|
; CHECK-NEXT: movq %[[R1]], 16(%{{.*}})
|
|
|
|
; CHECK-NEXT: movdqa %[[R0]], (%{{.*}})
|
2010-01-24 08:05:03 +08:00
|
|
|
%a = load %i32vec7* %ap, align 16
|
|
|
|
%b = load %i32vec7* %bp, align 16
|
|
|
|
%x = add %i32vec7 %a, %b
|
|
|
|
store %i32vec7 %x, %i32vec7* %ret, align 16
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
%i32vec12 = type <12 x i32>
|
|
|
|
define void @add12i32(%i32vec12* sret %ret, %i32vec12* %ap, %i32vec12* %bp) {
|
2014-07-23 17:11:48 +08:00
|
|
|
; CHECK-LABEL: add12i32:
|
|
|
|
; CHECK: movdqa (%{{.*}}), %[[R0:xmm[0-9]+]]
|
|
|
|
; CHECK-NEXT: movdqa 16(%{{.*}}), %[[R1:xmm[0-9]+]]
|
|
|
|
; CHECK-NEXT: movdqa 32(%{{.*}}), %[[R2:xmm[0-9]+]]
|
|
|
|
; CHECK-NEXT: paddd (%{{.*}}), %[[R0]]
|
|
|
|
; CHECK-NEXT: paddd 16(%{{.*}}), %[[R1]]
|
|
|
|
; CHECK-NEXT: paddd 32(%{{.*}}), %[[R2]]
|
|
|
|
; CHECK-NEXT: movdqa %[[R2]], 32(%{{.*}})
|
|
|
|
; CHECK-NEXT: movdqa %[[R1]], 16(%{{.*}})
|
|
|
|
; CHECK-NEXT: movdqa %[[R0]], (%{{.*}})
|
2010-01-24 08:05:03 +08:00
|
|
|
%a = load %i32vec12* %ap, align 16
|
|
|
|
%b = load %i32vec12* %bp, align 16
|
|
|
|
%x = add %i32vec12 %a, %b
|
|
|
|
store %i32vec12 %x, %i32vec12* %ret, align 16
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
%i16vec3 = type <3 x i16>
|
|
|
|
define void @add3i16(%i16vec3* nocapture sret %ret, %i16vec3* %ap, %i16vec3* %bp) nounwind {
|
2014-07-23 17:11:48 +08:00
|
|
|
; CHECK-LABEL: add3i16:
|
|
|
|
; CHECK: pmovzxwd (%{{.*}}), %[[R0:xmm[0-9]+]]
|
|
|
|
; CHECK-NEXT: pmovzxwd (%{{.*}}), %[[R1:xmm[0-9]+]]
|
[x86] Make vector legalization of extloads work more like the "normal"
vector operation legalization with support for custom target lowering
and fallback to expand when it fails, and use this to implement sext and
anyext load lowering for x86 in a more principled way.
Previously, the x86 backend relied on a target DAG combine to "combine
away" sextload and extload nodes prior to legalization, or would expand
them during legalization with terrible code. This is particularly
problematic because the DAG combine relies on running over non-canonical
DAG nodes at just the right time to match several common and important
patterns. It used a combine rather than lowering because we didn't have
good lowering support, and to expose some tricks being employed to more
combine phases.
With this change it becomes a proper lowering operation, the backend
marks that it can lower these nodes, and I've added support for handling
the canonical forms that don't have direct legal representations such as
sextload of a v4i8 -> v4i64 on AVX1. With this change, our test cases
for this behavior continue to pass even after the DAG combiner beigns
running more systematically over every node.
There is some noise caused by this in the test suite where we actually
use vector extends instead of subregister extraction. This doesn't
really seem like the right thing to do, but is unlikely to be a critical
regression. We do regress in one case where by lowering to the
target-specific patterns early we were able to combine away extraneous
legal math nodes. However, this regression is completely addressed by
switching to a widening based legalization which is what I'm working
toward anyways, so I've just switched the test to that mode.
Differential Revision: http://reviews.llvm.org/D4654
llvm-svn: 213897
2014-07-25 06:09:56 +08:00
|
|
|
; CHECK-NEXT: paddd %[[R0]], %[[R1]]
|
|
|
|
; CHECK-NEXT: movdqa %[[R1]], %[[R0]]
|
|
|
|
; CHECK-NEXT: pshufb {{.*}}, %[[R0]]
|
|
|
|
; CHECK-NEXT: pmovzxdq %[[R0]], %[[R0]]
|
|
|
|
; CHECK-NEXT: pextrw $4, %[[R1]], 4(%{{.*}})
|
|
|
|
; CHECK-NEXT: movd %[[R0]], (%{{.*}})
|
2010-01-24 08:05:03 +08:00
|
|
|
%a = load %i16vec3* %ap, align 16
|
|
|
|
%b = load %i16vec3* %bp, align 16
|
|
|
|
%x = add %i16vec3 %a, %b
|
|
|
|
store %i16vec3 %x, %i16vec3* %ret, align 16
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
%i16vec4 = type <4 x i16>
|
|
|
|
define void @add4i16(%i16vec4* nocapture sret %ret, %i16vec4* %ap, %i16vec4* %bp) nounwind {
|
2014-07-23 17:11:48 +08:00
|
|
|
; CHECK-LABEL: add4i16:
|
2014-07-25 06:15:28 +08:00
|
|
|
; CHECK: movq (%{{.*}}), %[[R0:xmm[0-9]+]]
|
|
|
|
; CHECK-NEXT: movq (%{{.*}}), %[[R1:xmm[0-9]+]]
|
|
|
|
; CHECK-NEXT: paddw %[[R0]], %[[R1]]
|
2014-07-23 17:11:48 +08:00
|
|
|
; CHECK-NEXT: movq %[[R1]], (%{{.*}})
|
2010-01-24 08:05:03 +08:00
|
|
|
%a = load %i16vec4* %ap, align 16
|
|
|
|
%b = load %i16vec4* %bp, align 16
|
|
|
|
%x = add %i16vec4 %a, %b
|
|
|
|
store %i16vec4 %x, %i16vec4* %ret, align 16
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
%i16vec12 = type <12 x i16>
|
|
|
|
define void @add12i16(%i16vec12* nocapture sret %ret, %i16vec12* %ap, %i16vec12* %bp) nounwind {
|
2014-07-23 17:11:48 +08:00
|
|
|
; CHECK-LABEL: add12i16:
|
|
|
|
; CHECK: movdqa (%{{.*}}), %[[R0:xmm[0-9]+]]
|
|
|
|
; CHECK-NEXT: movdqa 16(%{{.*}}), %[[R1:xmm[0-9]+]]
|
|
|
|
; CHECK-NEXT: paddw (%{{.*}}), %[[R0]]
|
|
|
|
; CHECK-NEXT: paddw 16(%{{.*}}), %[[R1]]
|
|
|
|
; CHECK-NEXT: movq %[[R1]], 16(%{{.*}})
|
|
|
|
; CHECK-NEXT: movdqa %[[R0]], (%{{.*}})
|
2010-01-24 08:05:03 +08:00
|
|
|
%a = load %i16vec12* %ap, align 16
|
|
|
|
%b = load %i16vec12* %bp, align 16
|
|
|
|
%x = add %i16vec12 %a, %b
|
|
|
|
store %i16vec12 %x, %i16vec12* %ret, align 16
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
%i16vec18 = type <18 x i16>
|
|
|
|
define void @add18i16(%i16vec18* nocapture sret %ret, %i16vec18* %ap, %i16vec18* %bp) nounwind {
|
2014-07-23 17:11:48 +08:00
|
|
|
; CHECK-LABEL: add18i16:
|
|
|
|
; CHECK: movdqa (%{{.*}}), %[[R0:xmm[0-9]+]]
|
|
|
|
; CHECK-NEXT: movdqa 16(%{{.*}}), %[[R1:xmm[0-9]+]]
|
|
|
|
; CHECK-NEXT: movdqa 32(%{{.*}}), %[[R2:xmm[0-9]+]]
|
|
|
|
; CHECK-NEXT: paddw (%{{.*}}), %[[R0]]
|
|
|
|
; CHECK-NEXT: paddw 16(%{{.*}}), %[[R1]]
|
|
|
|
; CHECK-NEXT: paddw 32(%{{.*}}), %[[R2]]
|
|
|
|
; CHECK-NEXT: movd %[[R2]], 32(%{{.*}})
|
|
|
|
; CHECK-NEXT: movdqa %[[R1]], 16(%{{.*}})
|
|
|
|
; CHECK-NEXT: movdqa %[[R0]], (%{{.*}})
|
2010-01-24 08:05:03 +08:00
|
|
|
%a = load %i16vec18* %ap, align 16
|
|
|
|
%b = load %i16vec18* %bp, align 16
|
|
|
|
%x = add %i16vec18 %a, %b
|
|
|
|
store %i16vec18 %x, %i16vec18* %ret, align 16
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
%i8vec3 = type <3 x i8>
|
|
|
|
define void @add3i8(%i8vec3* nocapture sret %ret, %i8vec3* %ap, %i8vec3* %bp) nounwind {
|
2014-07-23 17:11:48 +08:00
|
|
|
; CHECK-LABEL: add3i8:
|
|
|
|
; CHECK: pmovzxbd (%{{.*}}), %[[R0:xmm[0-9]+]]
|
|
|
|
; CHECK-NEXT: pmovzxbd (%{{.*}}), %[[R1:xmm[0-9]+]]
|
[x86] Make vector legalization of extloads work more like the "normal"
vector operation legalization with support for custom target lowering
and fallback to expand when it fails, and use this to implement sext and
anyext load lowering for x86 in a more principled way.
Previously, the x86 backend relied on a target DAG combine to "combine
away" sextload and extload nodes prior to legalization, or would expand
them during legalization with terrible code. This is particularly
problematic because the DAG combine relies on running over non-canonical
DAG nodes at just the right time to match several common and important
patterns. It used a combine rather than lowering because we didn't have
good lowering support, and to expose some tricks being employed to more
combine phases.
With this change it becomes a proper lowering operation, the backend
marks that it can lower these nodes, and I've added support for handling
the canonical forms that don't have direct legal representations such as
sextload of a v4i8 -> v4i64 on AVX1. With this change, our test cases
for this behavior continue to pass even after the DAG combiner beigns
running more systematically over every node.
There is some noise caused by this in the test suite where we actually
use vector extends instead of subregister extraction. This doesn't
really seem like the right thing to do, but is unlikely to be a critical
regression. We do regress in one case where by lowering to the
target-specific patterns early we were able to combine away extraneous
legal math nodes. However, this regression is completely addressed by
switching to a widening based legalization which is what I'm working
toward anyways, so I've just switched the test to that mode.
Differential Revision: http://reviews.llvm.org/D4654
llvm-svn: 213897
2014-07-25 06:09:56 +08:00
|
|
|
; CHECK-NEXT: paddd %[[R0]], %[[R1]]
|
|
|
|
; CHECK-NEXT: movdqa %[[R1]], %[[R0]]
|
|
|
|
; CHECK-NEXT: pshufb {{.*}}, %[[R0]]
|
|
|
|
; CHECK-NEXT: pmovzxwq %[[R0]], %[[R0]]
|
|
|
|
; CHECK-NEXT: pextrb $8, %[[R1]], 2(%{{.*}})
|
|
|
|
; CHECK-NEXT: movd %[[R0]], %e[[R2:[abcd]]]x
|
|
|
|
; CHECK-NEXT: movw %[[R2]]x, (%{{.*}})
|
2010-01-24 08:05:03 +08:00
|
|
|
%a = load %i8vec3* %ap, align 16
|
|
|
|
%b = load %i8vec3* %bp, align 16
|
|
|
|
%x = add %i8vec3 %a, %b
|
|
|
|
store %i8vec3 %x, %i8vec3* %ret, align 16
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
%i8vec31 = type <31 x i8>
|
|
|
|
define void @add31i8(%i8vec31* nocapture sret %ret, %i8vec31* %ap, %i8vec31* %bp) nounwind {
|
2014-07-23 17:11:48 +08:00
|
|
|
; CHECK-LABEL: add31i8:
|
|
|
|
; CHECK: movdqa (%{{.*}}), %[[R0:xmm[0-9]+]]
|
|
|
|
; CHECK-NEXT: movdqa 16(%{{.*}}), %[[R1:xmm[0-9]+]]
|
|
|
|
; CHECK-NEXT: paddb (%{{.*}}), %[[R0]]
|
|
|
|
; CHECK-NEXT: paddb 16(%{{.*}}), %[[R1]]
|
|
|
|
; CHECK-NEXT: pextrb $14, %[[R1]], 30(%{{.*}})
|
|
|
|
; CHECK-NEXT: pextrw $6, %[[R1]], 28(%{{.*}})
|
|
|
|
; CHECK-NEXT: pextrd $2, %[[R1]], 24(%{{.*}})
|
|
|
|
; CHECK-NEXT: movq %[[R1]], 16(%{{.*}})
|
|
|
|
; CHECK-NEXT: movdqa %[[R0]], (%{{.*}})
|
2010-01-24 08:05:03 +08:00
|
|
|
%a = load %i8vec31* %ap, align 16
|
|
|
|
%b = load %i8vec31* %bp, align 16
|
|
|
|
%x = add %i8vec31 %a, %b
|
|
|
|
store %i8vec31 %x, %i8vec31* %ret, align 16
|
|
|
|
ret void
|
2010-03-19 09:19:52 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
%i8vec3pack = type { <3 x i8>, i8 }
|
2014-07-23 17:11:48 +08:00
|
|
|
define void @rot(%i8vec3pack* nocapture sret %result, %i8vec3pack* %X, %i8vec3pack* %rot) nounwind {
|
|
|
|
; CHECK-LABEL: rot:
|
|
|
|
; CHECK: movdqa {{.*}}, %[[CONSTANT0:xmm[0-9]+]]
|
|
|
|
; CHECK-NEXT: movdqa {{.*}}, %[[SHUFFLE_MASK:xmm[0-9]+]]
|
|
|
|
; CHECK-NEXT: pshufb %[[SHUFFLE_MASK]], %[[CONSTANT0]]
|
[x86] Make vector legalization of extloads work more like the "normal"
vector operation legalization with support for custom target lowering
and fallback to expand when it fails, and use this to implement sext and
anyext load lowering for x86 in a more principled way.
Previously, the x86 backend relied on a target DAG combine to "combine
away" sextload and extload nodes prior to legalization, or would expand
them during legalization with terrible code. This is particularly
problematic because the DAG combine relies on running over non-canonical
DAG nodes at just the right time to match several common and important
patterns. It used a combine rather than lowering because we didn't have
good lowering support, and to expose some tricks being employed to more
combine phases.
With this change it becomes a proper lowering operation, the backend
marks that it can lower these nodes, and I've added support for handling
the canonical forms that don't have direct legal representations such as
sextload of a v4i8 -> v4i64 on AVX1. With this change, our test cases
for this behavior continue to pass even after the DAG combiner beigns
running more systematically over every node.
There is some noise caused by this in the test suite where we actually
use vector extends instead of subregister extraction. This doesn't
really seem like the right thing to do, but is unlikely to be a critical
regression. We do regress in one case where by lowering to the
target-specific patterns early we were able to combine away extraneous
legal math nodes. However, this regression is completely addressed by
switching to a widening based legalization which is what I'm working
toward anyways, so I've just switched the test to that mode.
Differential Revision: http://reviews.llvm.org/D4654
llvm-svn: 213897
2014-07-25 06:09:56 +08:00
|
|
|
; CHECK-NEXT: pmovzxwq %[[CONSTANT0]], %[[CONSTANT0]]
|
2014-07-23 17:11:48 +08:00
|
|
|
; CHECK-NEXT: movd %[[CONSTANT0]], %e[[R0:[abcd]]]x
|
|
|
|
; CHECK-NEXT: movw %[[R0]]x, (%[[PTR0:.*]])
|
|
|
|
; CHECK-NEXT: movb $-98, 2(%[[PTR0]])
|
|
|
|
; CHECK-NEXT: movdqa {{.*}}, %[[CONSTANT1:xmm[0-9]+]]
|
|
|
|
; CHECK-NEXT: pshufb %[[SHUFFLE_MASK]], %[[CONSTANT1]]
|
[x86] Make vector legalization of extloads work more like the "normal"
vector operation legalization with support for custom target lowering
and fallback to expand when it fails, and use this to implement sext and
anyext load lowering for x86 in a more principled way.
Previously, the x86 backend relied on a target DAG combine to "combine
away" sextload and extload nodes prior to legalization, or would expand
them during legalization with terrible code. This is particularly
problematic because the DAG combine relies on running over non-canonical
DAG nodes at just the right time to match several common and important
patterns. It used a combine rather than lowering because we didn't have
good lowering support, and to expose some tricks being employed to more
combine phases.
With this change it becomes a proper lowering operation, the backend
marks that it can lower these nodes, and I've added support for handling
the canonical forms that don't have direct legal representations such as
sextload of a v4i8 -> v4i64 on AVX1. With this change, our test cases
for this behavior continue to pass even after the DAG combiner beigns
running more systematically over every node.
There is some noise caused by this in the test suite where we actually
use vector extends instead of subregister extraction. This doesn't
really seem like the right thing to do, but is unlikely to be a critical
regression. We do regress in one case where by lowering to the
target-specific patterns early we were able to combine away extraneous
legal math nodes. However, this regression is completely addressed by
switching to a widening based legalization which is what I'm working
toward anyways, so I've just switched the test to that mode.
Differential Revision: http://reviews.llvm.org/D4654
llvm-svn: 213897
2014-07-25 06:09:56 +08:00
|
|
|
; CHECK-NEXT: pmovzxwq %[[CONSTANT1]], %[[CONSTANT1]]
|
2014-07-23 17:11:48 +08:00
|
|
|
; CHECK-NEXT: movd %[[CONSTANT1]], %e[[R1:[abcd]]]x
|
|
|
|
; CHECK-NEXT: movw %[[R1]]x, (%[[PTR1:.*]])
|
|
|
|
; CHECK-NEXT: movb $1, 2(%[[PTR1]])
|
|
|
|
; CHECK-NEXT: pmovzxbd (%[[PTR0]]), %[[X0:xmm[0-9]+]]
|
|
|
|
; CHECK-NEXT: pand {{.*}}, %[[X0]]
|
|
|
|
; CHECK-NEXT: pextrd $1, %[[X0]], %e[[R0:[abcd]]]x
|
|
|
|
; CHECK-NEXT: shrl %e[[R0]]x
|
|
|
|
; CHECK-NEXT: movd %[[X0]], %e[[R1:[abcd]]]x
|
|
|
|
; CHECK-NEXT: shrl %e[[R1]]x
|
|
|
|
; CHECK-NEXT: movd %e[[R1]]x, %[[X1:xmm[0-9]+]]
|
|
|
|
; CHECK-NEXT: pinsrd $1, %e[[R0]]x, %[[X1]]
|
|
|
|
; CHECK-NEXT: pextrd $2, %[[X0]], %e[[R0:[abcd]]]x
|
|
|
|
; CHECK-NEXT: shrl %e[[R0]]x
|
|
|
|
; CHECK-NEXT: pinsrd $2, %e[[R0]]x, %[[X1]]
|
|
|
|
; CHECK-NEXT: pextrd $3, %[[X0]], %e[[R0:[abcd]]]x
|
|
|
|
; CHECK-NEXT: pinsrd $3, %e[[R0]]x, %[[X1]]
|
|
|
|
; CHECK-NEXT: movdqa %[[X1]], %[[X2:xmm[0-9]+]]
|
|
|
|
; CHECK-NEXT: pshufb %[[SHUFFLE_MASK]], %[[X2]]
|
[x86] Make vector legalization of extloads work more like the "normal"
vector operation legalization with support for custom target lowering
and fallback to expand when it fails, and use this to implement sext and
anyext load lowering for x86 in a more principled way.
Previously, the x86 backend relied on a target DAG combine to "combine
away" sextload and extload nodes prior to legalization, or would expand
them during legalization with terrible code. This is particularly
problematic because the DAG combine relies on running over non-canonical
DAG nodes at just the right time to match several common and important
patterns. It used a combine rather than lowering because we didn't have
good lowering support, and to expose some tricks being employed to more
combine phases.
With this change it becomes a proper lowering operation, the backend
marks that it can lower these nodes, and I've added support for handling
the canonical forms that don't have direct legal representations such as
sextload of a v4i8 -> v4i64 on AVX1. With this change, our test cases
for this behavior continue to pass even after the DAG combiner beigns
running more systematically over every node.
There is some noise caused by this in the test suite where we actually
use vector extends instead of subregister extraction. This doesn't
really seem like the right thing to do, but is unlikely to be a critical
regression. We do regress in one case where by lowering to the
target-specific patterns early we were able to combine away extraneous
legal math nodes. However, this regression is completely addressed by
switching to a widening based legalization which is what I'm working
toward anyways, so I've just switched the test to that mode.
Differential Revision: http://reviews.llvm.org/D4654
llvm-svn: 213897
2014-07-25 06:09:56 +08:00
|
|
|
; CHECK-NEXT: pmovzxwq %[[X2]], %[[X3:xmm[0-9]+]]
|
2014-07-23 17:11:48 +08:00
|
|
|
; CHECK-NEXT: pextrb $8, %[[X1]], 2(%{{.*}})
|
[x86] Make vector legalization of extloads work more like the "normal"
vector operation legalization with support for custom target lowering
and fallback to expand when it fails, and use this to implement sext and
anyext load lowering for x86 in a more principled way.
Previously, the x86 backend relied on a target DAG combine to "combine
away" sextload and extload nodes prior to legalization, or would expand
them during legalization with terrible code. This is particularly
problematic because the DAG combine relies on running over non-canonical
DAG nodes at just the right time to match several common and important
patterns. It used a combine rather than lowering because we didn't have
good lowering support, and to expose some tricks being employed to more
combine phases.
With this change it becomes a proper lowering operation, the backend
marks that it can lower these nodes, and I've added support for handling
the canonical forms that don't have direct legal representations such as
sextload of a v4i8 -> v4i64 on AVX1. With this change, our test cases
for this behavior continue to pass even after the DAG combiner beigns
running more systematically over every node.
There is some noise caused by this in the test suite where we actually
use vector extends instead of subregister extraction. This doesn't
really seem like the right thing to do, but is unlikely to be a critical
regression. We do regress in one case where by lowering to the
target-specific patterns early we were able to combine away extraneous
legal math nodes. However, this regression is completely addressed by
switching to a widening based legalization which is what I'm working
toward anyways, so I've just switched the test to that mode.
Differential Revision: http://reviews.llvm.org/D4654
llvm-svn: 213897
2014-07-25 06:09:56 +08:00
|
|
|
; CHECK-NEXT: movd %[[X3]], %e[[R0:[abcd]]]x
|
2014-07-23 17:11:48 +08:00
|
|
|
; CHECK-NEXT: movw %[[R0]]x, (%{{.*}})
|
|
|
|
|
2010-03-19 09:19:52 +08:00
|
|
|
entry:
|
|
|
|
%storetmp = bitcast %i8vec3pack* %X to <3 x i8>*
|
|
|
|
store <3 x i8> <i8 -98, i8 -98, i8 -98>, <3 x i8>* %storetmp
|
|
|
|
%storetmp1 = bitcast %i8vec3pack* %rot to <3 x i8>*
|
|
|
|
store <3 x i8> <i8 1, i8 1, i8 1>, <3 x i8>* %storetmp1
|
|
|
|
%tmp = load %i8vec3pack* %X
|
|
|
|
%extractVec = extractvalue %i8vec3pack %tmp, 0
|
|
|
|
%tmp2 = load %i8vec3pack* %rot
|
|
|
|
%extractVec3 = extractvalue %i8vec3pack %tmp2, 0
|
|
|
|
%shr = lshr <3 x i8> %extractVec, %extractVec3
|
|
|
|
%storetmp4 = bitcast %i8vec3pack* %result to <3 x i8>*
|
|
|
|
store <3 x i8> %shr, <3 x i8>* %storetmp4
|
2014-07-23 17:11:48 +08:00
|
|
|
ret void
|
2010-03-19 09:19:52 +08:00
|
|
|
}
|
|
|
|
|