From ed43f156377ac74216237290335f15576ca0c647 Mon Sep 17 00:00:00 2001 From: Igor Breger Date: Thu, 9 Feb 2017 07:39:19 +0000 Subject: [PATCH] Add new tests for EXTRACT_VECTOR_ELT (vector of packed i8/16/i32/i64/ps/pd data) llvm-svn: 294565 --- .../test/CodeGen/X86/avx512-insert-extract.ll | 428 +++++++++++++++++- 1 file changed, 427 insertions(+), 1 deletion(-) diff --git a/llvm/test/CodeGen/X86/avx512-insert-extract.ll b/llvm/test/CodeGen/X86/avx512-insert-extract.ll index cb8ed0e59a3a..ac13373c2bb0 100644 --- a/llvm/test/CodeGen/X86/avx512-insert-extract.ll +++ b/llvm/test/CodeGen/X86/avx512-insert-extract.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck --check-prefix=KNL %s -; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck --check-prefix=SKX %s +; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck --check-prefix=SKX --check-prefix=SKX_ONLY %s +; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=skx -mattr=avx512vbmi | FileCheck --check-prefix=SKX --check-prefix=SKX_VBMI %s define <16 x float> @test1(<16 x float> %x, float* %br, float %y) nounwind { ; KNL-LABEL: test1: @@ -1446,3 +1447,428 @@ define zeroext i8 @test_extractelement_v64i1(<64 x i8> %a, <64 x i8> %b) { %res = select i1 %t2, i8 3, i8 4 ret i8 %res } + +define i64 @test_extractelement_variable_v2i64(<2 x i64> %t1, i32 %index) { +; KNL-LABEL: test_extractelement_variable_v2i64: +; KNL: ## BB#0: +; KNL-NEXT: ## kill: %EDI %EDI %RDI +; KNL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) +; KNL-NEXT: andl $1, %edi +; KNL-NEXT: movq -24(%rsp,%rdi,8), %rax +; KNL-NEXT: retq +; +; SKX-LABEL: test_extractelement_variable_v2i64: +; SKX: ## BB#0: +; SKX-NEXT: ## kill: %EDI %EDI %RDI +; SKX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) +; SKX-NEXT: andl $1, %edi +; SKX-NEXT: movq -24(%rsp,%rdi,8), %rax +; SKX-NEXT: retq + %t2 = extractelement <2 x i64> %t1, i32 %index + ret i64 %t2 +} + +define i64 @test_extractelement_variable_v4i64(<4 x i64> %t1, i32 %index) { +; KNL-LABEL: test_extractelement_variable_v4i64: +; KNL: ## BB#0: +; KNL-NEXT: pushq %rbp +; KNL-NEXT: Lcfi3: +; KNL-NEXT: .cfi_def_cfa_offset 16 +; KNL-NEXT: Lcfi4: +; KNL-NEXT: .cfi_offset %rbp, -16 +; KNL-NEXT: movq %rsp, %rbp +; KNL-NEXT: Lcfi5: +; KNL-NEXT: .cfi_def_cfa_register %rbp +; KNL-NEXT: andq $-32, %rsp +; KNL-NEXT: subq $64, %rsp +; KNL-NEXT: ## kill: %EDI %EDI %RDI +; KNL-NEXT: vmovaps %ymm0, (%rsp) +; KNL-NEXT: andl $3, %edi +; KNL-NEXT: movq (%rsp,%rdi,8), %rax +; KNL-NEXT: movq %rbp, %rsp +; KNL-NEXT: popq %rbp +; KNL-NEXT: retq +; +; SKX-LABEL: test_extractelement_variable_v4i64: +; SKX: ## BB#0: +; SKX-NEXT: pushq %rbp +; SKX-NEXT: Lcfi0: +; SKX-NEXT: .cfi_def_cfa_offset 16 +; SKX-NEXT: Lcfi1: +; SKX-NEXT: .cfi_offset %rbp, -16 +; SKX-NEXT: movq %rsp, %rbp +; SKX-NEXT: Lcfi2: +; SKX-NEXT: .cfi_def_cfa_register %rbp +; SKX-NEXT: andq $-32, %rsp +; SKX-NEXT: subq $64, %rsp +; SKX-NEXT: ## kill: %EDI %EDI %RDI +; SKX-NEXT: vmovaps %ymm0, (%rsp) +; SKX-NEXT: andl $3, %edi +; SKX-NEXT: movq (%rsp,%rdi,8), %rax +; SKX-NEXT: movq %rbp, %rsp +; SKX-NEXT: popq %rbp +; SKX-NEXT: retq + %t2 = extractelement <4 x i64> %t1, i32 %index + ret i64 %t2 +} + +define i64 @test_extractelement_variable_v8i64(<8 x i64> %t1, i32 %index) { +; KNL-LABEL: test_extractelement_variable_v8i64: +; KNL: ## BB#0: +; KNL-NEXT: movslq %edi, %rax +; KNL-NEXT: vmovq %rax, %xmm1 +; KNL-NEXT: vpermq %zmm0, %zmm1, %zmm0 +; KNL-NEXT: vmovq %xmm0, %rax +; KNL-NEXT: retq +; +; SKX-LABEL: test_extractelement_variable_v8i64: +; SKX: ## BB#0: +; SKX-NEXT: movslq %edi, %rax +; SKX-NEXT: vmovq %rax, %xmm1 +; SKX-NEXT: vpermq %zmm0, %zmm1, %zmm0 +; SKX-NEXT: vmovq %xmm0, %rax +; SKX-NEXT: retq + %t2 = extractelement <8 x i64> %t1, i32 %index + ret i64 %t2 +} + +define double @test_extractelement_variable_v2f64(<2 x double> %t1, i32 %index) { +; KNL-LABEL: test_extractelement_variable_v2f64: +; KNL: ## BB#0: +; KNL-NEXT: ## kill: %EDI %EDI %RDI +; KNL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) +; KNL-NEXT: andl $1, %edi +; KNL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; KNL-NEXT: retq +; +; SKX-LABEL: test_extractelement_variable_v2f64: +; SKX: ## BB#0: +; SKX-NEXT: ## kill: %EDI %EDI %RDI +; SKX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) +; SKX-NEXT: andl $1, %edi +; SKX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; SKX-NEXT: retq + %t2 = extractelement <2 x double> %t1, i32 %index + ret double %t2 +} + +define double @test_extractelement_variable_v4f64(<4 x double> %t1, i32 %index) { +; KNL-LABEL: test_extractelement_variable_v4f64: +; KNL: ## BB#0: +; KNL-NEXT: pushq %rbp +; KNL-NEXT: Lcfi6: +; KNL-NEXT: .cfi_def_cfa_offset 16 +; KNL-NEXT: Lcfi7: +; KNL-NEXT: .cfi_offset %rbp, -16 +; KNL-NEXT: movq %rsp, %rbp +; KNL-NEXT: Lcfi8: +; KNL-NEXT: .cfi_def_cfa_register %rbp +; KNL-NEXT: andq $-32, %rsp +; KNL-NEXT: subq $64, %rsp +; KNL-NEXT: ## kill: %EDI %EDI %RDI +; KNL-NEXT: vmovaps %ymm0, (%rsp) +; KNL-NEXT: andl $3, %edi +; KNL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; KNL-NEXT: movq %rbp, %rsp +; KNL-NEXT: popq %rbp +; KNL-NEXT: retq +; +; SKX-LABEL: test_extractelement_variable_v4f64: +; SKX: ## BB#0: +; SKX-NEXT: pushq %rbp +; SKX-NEXT: Lcfi3: +; SKX-NEXT: .cfi_def_cfa_offset 16 +; SKX-NEXT: Lcfi4: +; SKX-NEXT: .cfi_offset %rbp, -16 +; SKX-NEXT: movq %rsp, %rbp +; SKX-NEXT: Lcfi5: +; SKX-NEXT: .cfi_def_cfa_register %rbp +; SKX-NEXT: andq $-32, %rsp +; SKX-NEXT: subq $64, %rsp +; SKX-NEXT: ## kill: %EDI %EDI %RDI +; SKX-NEXT: vmovaps %ymm0, (%rsp) +; SKX-NEXT: andl $3, %edi +; SKX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; SKX-NEXT: movq %rbp, %rsp +; SKX-NEXT: popq %rbp +; SKX-NEXT: retq + %t2 = extractelement <4 x double> %t1, i32 %index + ret double %t2 +} + +define double @test_extractelement_variable_v8f64(<8 x double> %t1, i32 %index) { +; KNL-LABEL: test_extractelement_variable_v8f64: +; KNL: ## BB#0: +; KNL-NEXT: movslq %edi, %rax +; KNL-NEXT: vmovq %rax, %xmm1 +; KNL-NEXT: vpermpd %zmm0, %zmm1, %zmm0 +; KNL-NEXT: ## kill: %XMM0 %XMM0 %ZMM0 +; KNL-NEXT: retq +; +; SKX-LABEL: test_extractelement_variable_v8f64: +; SKX: ## BB#0: +; SKX-NEXT: movslq %edi, %rax +; SKX-NEXT: vmovq %rax, %xmm1 +; SKX-NEXT: vpermpd %zmm0, %zmm1, %zmm0 +; SKX-NEXT: ## kill: %XMM0 %XMM0 %ZMM0 +; SKX-NEXT: retq + %t2 = extractelement <8 x double> %t1, i32 %index + ret double %t2 +} + +define i32 @test_extractelement_variable_v4i32(<4 x i32> %t1, i32 %index) { +; KNL-LABEL: test_extractelement_variable_v4i32: +; KNL: ## BB#0: +; KNL-NEXT: ## kill: %EDI %EDI %RDI +; KNL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) +; KNL-NEXT: andl $3, %edi +; KNL-NEXT: movl -24(%rsp,%rdi,4), %eax +; KNL-NEXT: retq +; +; SKX-LABEL: test_extractelement_variable_v4i32: +; SKX: ## BB#0: +; SKX-NEXT: ## kill: %EDI %EDI %RDI +; SKX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) +; SKX-NEXT: andl $3, %edi +; SKX-NEXT: movl -24(%rsp,%rdi,4), %eax +; SKX-NEXT: retq + %t2 = extractelement <4 x i32> %t1, i32 %index + ret i32 %t2 +} + +define i32 @test_extractelement_variable_v8i32(<8 x i32> %t1, i32 %index) { +; KNL-LABEL: test_extractelement_variable_v8i32: +; KNL: ## BB#0: +; KNL-NEXT: vmovd %edi, %xmm1 +; KNL-NEXT: vpermd %ymm0, %ymm1, %ymm0 +; KNL-NEXT: vmovd %xmm0, %eax +; KNL-NEXT: retq +; +; SKX-LABEL: test_extractelement_variable_v8i32: +; SKX: ## BB#0: +; SKX-NEXT: vmovd %edi, %xmm1 +; SKX-NEXT: vpermd %ymm0, %ymm1, %ymm0 +; SKX-NEXT: vmovd %xmm0, %eax +; SKX-NEXT: retq + %t2 = extractelement <8 x i32> %t1, i32 %index + ret i32 %t2 +} + +define i32 @test_extractelement_variable_v16i32(<16 x i32> %t1, i32 %index) { +; KNL-LABEL: test_extractelement_variable_v16i32: +; KNL: ## BB#0: +; KNL-NEXT: vmovd %edi, %xmm1 +; KNL-NEXT: vpermd %zmm0, %zmm1, %zmm0 +; KNL-NEXT: vmovd %xmm0, %eax +; KNL-NEXT: retq +; +; SKX-LABEL: test_extractelement_variable_v16i32: +; SKX: ## BB#0: +; SKX-NEXT: vmovd %edi, %xmm1 +; SKX-NEXT: vpermd %zmm0, %zmm1, %zmm0 +; SKX-NEXT: vmovd %xmm0, %eax +; SKX-NEXT: retq + %t2 = extractelement <16 x i32> %t1, i32 %index + ret i32 %t2 +} + +define float @test_extractelement_variable_v4f32(<4 x float> %t1, i32 %index) { +; KNL-LABEL: test_extractelement_variable_v4f32: +; KNL: ## BB#0: +; KNL-NEXT: ## kill: %EDI %EDI %RDI +; KNL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) +; KNL-NEXT: andl $3, %edi +; KNL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; KNL-NEXT: retq +; +; SKX-LABEL: test_extractelement_variable_v4f32: +; SKX: ## BB#0: +; SKX-NEXT: ## kill: %EDI %EDI %RDI +; SKX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) +; SKX-NEXT: andl $3, %edi +; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SKX-NEXT: retq + %t2 = extractelement <4 x float> %t1, i32 %index + ret float %t2 +} + +define float @test_extractelement_variable_v8f32(<8 x float> %t1, i32 %index) { +; KNL-LABEL: test_extractelement_variable_v8f32: +; KNL: ## BB#0: +; KNL-NEXT: vmovd %edi, %xmm1 +; KNL-NEXT: vpermps %ymm0, %ymm1, %ymm0 +; KNL-NEXT: ## kill: %XMM0 %XMM0 %YMM0 +; KNL-NEXT: retq +; +; SKX-LABEL: test_extractelement_variable_v8f32: +; SKX: ## BB#0: +; SKX-NEXT: vmovd %edi, %xmm1 +; SKX-NEXT: vpermps %ymm0, %ymm1, %ymm0 +; SKX-NEXT: ## kill: %XMM0 %XMM0 %YMM0 +; SKX-NEXT: retq + %t2 = extractelement <8 x float> %t1, i32 %index + ret float %t2 +} + +define float @test_extractelement_variable_v16f32(<16 x float> %t1, i32 %index) { +; KNL-LABEL: test_extractelement_variable_v16f32: +; KNL: ## BB#0: +; KNL-NEXT: vmovd %edi, %xmm1 +; KNL-NEXT: vpermps %zmm0, %zmm1, %zmm0 +; KNL-NEXT: ## kill: %XMM0 %XMM0 %ZMM0 +; KNL-NEXT: retq +; +; SKX-LABEL: test_extractelement_variable_v16f32: +; SKX: ## BB#0: +; SKX-NEXT: vmovd %edi, %xmm1 +; SKX-NEXT: vpermps %zmm0, %zmm1, %zmm0 +; SKX-NEXT: ## kill: %XMM0 %XMM0 %ZMM0 +; SKX-NEXT: retq + %t2 = extractelement <16 x float> %t1, i32 %index + ret float %t2 +} + +define i16 @test_extractelement_variable_v8i16(<8 x i16> %t1, i32 %index) { +; KNL-LABEL: test_extractelement_variable_v8i16: +; KNL: ## BB#0: +; KNL-NEXT: ## kill: %EDI %EDI %RDI +; KNL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) +; KNL-NEXT: andl $7, %edi +; KNL-NEXT: movzwl -24(%rsp,%rdi,2), %eax +; KNL-NEXT: retq +; +; SKX-LABEL: test_extractelement_variable_v8i16: +; SKX: ## BB#0: +; SKX-NEXT: ## kill: %EDI %EDI %RDI +; SKX-NEXT: vmovdqu %xmm0, -{{[0-9]+}}(%rsp) +; SKX-NEXT: andl $7, %edi +; SKX-NEXT: movzwl -24(%rsp,%rdi,2), %eax +; SKX-NEXT: retq + %t2 = extractelement <8 x i16> %t1, i32 %index + ret i16 %t2 +} + +define i16 @test_extractelement_variable_v16i16(<16 x i16> %t1, i32 %index) { +; KNL-LABEL: test_extractelement_variable_v16i16: +; KNL: ## BB#0: +; KNL-NEXT: pushq %rbp +; KNL-NEXT: Lcfi9: +; KNL-NEXT: .cfi_def_cfa_offset 16 +; KNL-NEXT: Lcfi10: +; KNL-NEXT: .cfi_offset %rbp, -16 +; KNL-NEXT: movq %rsp, %rbp +; KNL-NEXT: Lcfi11: +; KNL-NEXT: .cfi_def_cfa_register %rbp +; KNL-NEXT: andq $-32, %rsp +; KNL-NEXT: subq $64, %rsp +; KNL-NEXT: ## kill: %EDI %EDI %RDI +; KNL-NEXT: vmovaps %ymm0, (%rsp) +; KNL-NEXT: andl $15, %edi +; KNL-NEXT: movzwl (%rsp,%rdi,2), %eax +; KNL-NEXT: movq %rbp, %rsp +; KNL-NEXT: popq %rbp +; KNL-NEXT: retq +; +; SKX-LABEL: test_extractelement_variable_v16i16: +; SKX: ## BB#0: +; SKX-NEXT: pushq %rbp +; SKX-NEXT: Lcfi6: +; SKX-NEXT: .cfi_def_cfa_offset 16 +; SKX-NEXT: Lcfi7: +; SKX-NEXT: .cfi_offset %rbp, -16 +; SKX-NEXT: movq %rsp, %rbp +; SKX-NEXT: Lcfi8: +; SKX-NEXT: .cfi_def_cfa_register %rbp +; SKX-NEXT: andq $-32, %rsp +; SKX-NEXT: subq $64, %rsp +; SKX-NEXT: ## kill: %EDI %EDI %RDI +; SKX-NEXT: vmovdqu %ymm0, (%rsp) +; SKX-NEXT: andl $15, %edi +; SKX-NEXT: movzwl (%rsp,%rdi,2), %eax +; SKX-NEXT: movq %rbp, %rsp +; SKX-NEXT: popq %rbp +; SKX-NEXT: retq + %t2 = extractelement <16 x i16> %t1, i32 %index + ret i16 %t2 +} + +; TODO - enable after fix +;define i16 @test_extractelement_variable_v32i16(<32 x i16> %t1, i32 %index) { +; %t2 = extractelement <32 x i16> %t1, i32 %index +; ret i16 %t2 +;} + +define i8 @test_extractelement_variable_v16i8(<16 x i8> %t1, i32 %index) { +; KNL-LABEL: test_extractelement_variable_v16i8: +; KNL: ## BB#0: +; KNL-NEXT: ## kill: %EDI %EDI %RDI +; KNL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) +; KNL-NEXT: andl $15, %edi +; KNL-NEXT: leaq -{{[0-9]+}}(%rsp), %rax +; KNL-NEXT: movb (%rdi,%rax), %al +; KNL-NEXT: retq +; +; SKX-LABEL: test_extractelement_variable_v16i8: +; SKX: ## BB#0: +; SKX-NEXT: ## kill: %EDI %EDI %RDI +; SKX-NEXT: vmovdqu %xmm0, -{{[0-9]+}}(%rsp) +; SKX-NEXT: andl $15, %edi +; SKX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax +; SKX-NEXT: movb (%rdi,%rax), %al +; SKX-NEXT: retq + %t2 = extractelement <16 x i8> %t1, i32 %index + ret i8 %t2 +} + +define i8 @test_extractelement_variable_v32i8(<32 x i8> %t1, i32 %index) { +; KNL-LABEL: test_extractelement_variable_v32i8: +; KNL: ## BB#0: +; KNL-NEXT: pushq %rbp +; KNL-NEXT: Lcfi12: +; KNL-NEXT: .cfi_def_cfa_offset 16 +; KNL-NEXT: Lcfi13: +; KNL-NEXT: .cfi_offset %rbp, -16 +; KNL-NEXT: movq %rsp, %rbp +; KNL-NEXT: Lcfi14: +; KNL-NEXT: .cfi_def_cfa_register %rbp +; KNL-NEXT: andq $-32, %rsp +; KNL-NEXT: subq $64, %rsp +; KNL-NEXT: ## kill: %EDI %EDI %RDI +; KNL-NEXT: vmovaps %ymm0, (%rsp) +; KNL-NEXT: andl $31, %edi +; KNL-NEXT: movq %rsp, %rax +; KNL-NEXT: movb (%rdi,%rax), %al +; KNL-NEXT: movq %rbp, %rsp +; KNL-NEXT: popq %rbp +; KNL-NEXT: retq +; +; SKX-LABEL: test_extractelement_variable_v32i8: +; SKX: ## BB#0: +; SKX-NEXT: pushq %rbp +; SKX-NEXT: Lcfi9: +; SKX-NEXT: .cfi_def_cfa_offset 16 +; SKX-NEXT: Lcfi10: +; SKX-NEXT: .cfi_offset %rbp, -16 +; SKX-NEXT: movq %rsp, %rbp +; SKX-NEXT: Lcfi11: +; SKX-NEXT: .cfi_def_cfa_register %rbp +; SKX-NEXT: andq $-32, %rsp +; SKX-NEXT: subq $64, %rsp +; SKX-NEXT: ## kill: %EDI %EDI %RDI +; SKX-NEXT: vmovdqu %ymm0, (%rsp) +; SKX-NEXT: andl $31, %edi +; SKX-NEXT: movq %rsp, %rax +; SKX-NEXT: movb (%rdi,%rax), %al +; SKX-NEXT: movq %rbp, %rsp +; SKX-NEXT: popq %rbp +; SKX-NEXT: retq + + %t2 = extractelement <32 x i8> %t1, i32 %index + ret i8 %t2 +} + +; TODO - enable after fix +;define i8 @test_extractelement_variable_v64i8(<64 x i8> %t1, i32 %index) { +; %t2 = extractelement <64 x i8> %t1, i32 %index +; ret i8 %t2 +;}