forked from OSchip/llvm-project
AVX-512: Optimized BUILD_VECTOR pattern;
fixed encoding of VEXTRACTPS instruction. llvm-svn: 201134
This commit is contained in:
parent
b72ea717d0
commit
2aafc22ed9
|
@ -6070,8 +6070,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
|
|||
|
||||
// For AVX-length vectors, build the individual 128-bit pieces and use
|
||||
// shuffles to put them in place.
|
||||
if (VT.is256BitVector()) {
|
||||
SmallVector<SDValue, 32> V;
|
||||
if (VT.is256BitVector() || VT.is512BitVector()) {
|
||||
SmallVector<SDValue, 64> V;
|
||||
for (unsigned i = 0; i != NumElems; ++i)
|
||||
V.push_back(Op.getOperand(i));
|
||||
|
||||
|
@ -6083,7 +6083,9 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
|
|||
NumElems/2);
|
||||
|
||||
// Recreate the wider vector with the lower and upper part.
|
||||
return Concat128BitVectors(Lower, Upper, VT, NumElems, DAG, dl);
|
||||
if (VT.is256BitVector())
|
||||
return Concat128BitVectors(Lower, Upper, VT, NumElems, DAG, dl);
|
||||
return Concat256BitVectors(Lower, Upper, VT, NumElems, DAG, dl);
|
||||
}
|
||||
|
||||
// Let legalizer expand 2-wide build_vectors.
|
||||
|
|
|
@ -361,7 +361,7 @@ def VEXTRACTPSzmr : AVX512AIi8<0x17, MRMDestMem, (outs),
|
|||
(ins f32mem:$dst, VR128X:$src1, u32u8imm:$src2),
|
||||
"vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
[(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
|
||||
addr:$dst)]>, EVEX;
|
||||
addr:$dst)]>, EVEX, EVEX_CD8<32, CD8VT1>;
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
// AVX-512 BROADCAST
|
||||
|
|
|
@ -15,4 +15,16 @@ define <16 x i32> @test1(i32* %x) {
|
|||
define <16 x i32> @test2(<16 x i32> %x) {
|
||||
%res = add <16 x i32><i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, %x
|
||||
ret <16 x i32>%res
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test3
|
||||
; CHECK: vinsertf128
|
||||
; CHECK: vinsertf64x4
|
||||
; CHECK: ret
|
||||
define <16 x float> @test3(<4 x float> %a) {
|
||||
%b = extractelement <4 x float> %a, i32 2
|
||||
%c = insertelement <16 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float %b, i32 5
|
||||
%b1 = extractelement <4 x float> %a, i32 0
|
||||
%c1 = insertelement <16 x float> %c, float %b1, i32 6
|
||||
ret <16 x float>%c1
|
||||
}
|
|
@ -46,4 +46,8 @@ vmovdqu64 %zmm0, %zmm1 {%k3}
|
|||
|
||||
// CHECK: vmovd
|
||||
// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x7e,0xb4,0x24,0xac,0xff,0xff,0xff]
|
||||
vmovd %xmm22, -84(%rsp)
|
||||
vmovd %xmm22, -84(%rsp)
|
||||
|
||||
// CHECK: vextractps
|
||||
// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x17,0x61,0x1f,0x02]
|
||||
vextractps $2, %xmm20, 124(%rcx)
|
||||
|
|
Loading…
Reference in New Issue