From ef04598e147396e7225964ae8438ecbf6554b095 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 18 Oct 2019 09:59:51 +0000 Subject: [PATCH] [X86] Regenerate memcmp tests and add X64-AVX512 common prefix Should help make the changes in D69157 clearer llvm-svn: 375215 --- llvm/test/CodeGen/X86/memcmp.ll | 95 ++++++++++++++++++++++++++++----- 1 file changed, 81 insertions(+), 14 deletions(-) diff --git a/llvm/test/CodeGen/X86/memcmp.ll b/llvm/test/CodeGen/X86/memcmp.ll index 724828101c97..97116d991c10 100644 --- a/llvm/test/CodeGen/X86/memcmp.ll +++ b/llvm/test/CodeGen/X86/memcmp.ll @@ -1,12 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=cmov | FileCheck %s --check-prefix=X86 --check-prefix=X86-NOSSE -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse | FileCheck %s --check-prefix=X86 --check-prefix=SSE --check-prefix=X86-SSE1 -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86 --check-prefix=SSE --check-prefix=X86-SSE2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64 --check-prefix=X64-SSE2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX1 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX512F -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bw | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX512BW +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=cmov | FileCheck %s --check-prefixes=X86,X86-NOSSE +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse | FileCheck %s --check-prefixes=X86,SSE,X86-SSE1 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X86,SSE,X86-SSE2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=X64,X64-SSE2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX1 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f | FileCheck %s --check-prefixes=X64,X64-AVX512,X64-AVX512F +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bw | FileCheck %s --check-prefixes=X64,X64-AVX512,X64-AVX512BW ; This tests codegen time inlining/optimization of memcmp ; rdar://6480398 @@ -1007,6 +1007,14 @@ define i1 @length16_eq(i8* %x, i8* %y) nounwind { ; X64-AVX-NEXT: vptest %xmm0, %xmm0 ; X64-AVX-NEXT: setne %al ; X64-AVX-NEXT: retq +; +; X64-AVX512-LABEL: length16_eq: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vmovdqu (%rdi), %xmm0 +; X64-AVX512-NEXT: vpxor (%rsi), %xmm0, %xmm0 +; X64-AVX512-NEXT: vptest %xmm0, %xmm0 +; X64-AVX512-NEXT: setne %al +; X64-AVX512-NEXT: retq %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 16) nounwind %cmp = icmp ne i32 %call, 0 ret i1 %cmp @@ -1063,6 +1071,14 @@ define i1 @length16_eq_const(i8* %X) nounwind { ; X64-AVX-NEXT: vptest %xmm0, %xmm0 ; X64-AVX-NEXT: sete %al ; X64-AVX-NEXT: retq +; +; X64-AVX512-LABEL: length16_eq_const: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vmovdqu (%rdi), %xmm0 +; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; X64-AVX512-NEXT: vptest %xmm0, %xmm0 +; X64-AVX512-NEXT: sete %al +; X64-AVX512-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 16) nounwind %c = icmp eq i32 %m, 0 ret i1 %c @@ -1147,14 +1163,26 @@ define i1 @length24_eq(i8* %x, i8* %y) nounwind { ; X64-AVX-LABEL: length24_eq: ; X64-AVX: # %bb.0: ; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 -; X64-AVX-NEXT: vmovq 16(%rdi), %xmm1 -; X64-AVX-NEXT: vmovq 16(%rsi), %xmm2 +; X64-AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero +; X64-AVX-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero ; X64-AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0 ; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 ; X64-AVX-NEXT: vptest %xmm0, %xmm0 ; X64-AVX-NEXT: sete %al ; X64-AVX-NEXT: retq +; +; X64-AVX512-LABEL: length24_eq: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vmovdqu (%rdi), %xmm0 +; X64-AVX512-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero +; X64-AVX512-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero +; X64-AVX512-NEXT: vpxor %xmm2, %xmm1, %xmm1 +; X64-AVX512-NEXT: vpxor (%rsi), %xmm0, %xmm0 +; X64-AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 +; X64-AVX512-NEXT: vptest %xmm0, %xmm0 +; X64-AVX512-NEXT: sete %al +; X64-AVX512-NEXT: retq %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 24) nounwind %cmp = icmp eq i32 %call, 0 ret i1 %cmp @@ -1213,13 +1241,24 @@ define i1 @length24_eq_const(i8* %X) nounwind { ; X64-AVX-LABEL: length24_eq_const: ; X64-AVX: # %bb.0: ; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 -; X64-AVX-NEXT: vmovq 16(%rdi), %xmm1 +; X64-AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero ; X64-AVX-NEXT: vpxor {{.*}}(%rip), %xmm1, %xmm1 ; X64-AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 ; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 ; X64-AVX-NEXT: vptest %xmm0, %xmm0 ; X64-AVX-NEXT: setne %al ; X64-AVX-NEXT: retq +; +; X64-AVX512-LABEL: length24_eq_const: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vmovdqu (%rdi), %xmm0 +; X64-AVX512-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero +; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %xmm1, %xmm1 +; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; X64-AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 +; X64-AVX512-NEXT: vptest %xmm0, %xmm0 +; X64-AVX512-NEXT: setne %al +; X64-AVX512-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 24) nounwind %c = icmp ne i32 %m, 0 ret i1 %c @@ -1315,11 +1354,20 @@ define i1 @length32_eq(i8* %x, i8* %y) nounwind { ; X64-AVX2-LABEL: length32_eq: ; X64-AVX2: # %bb.0: ; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0 -; X64-AVX2-NEXT: vpxor (%rsi), %ymm0, %ymm0 -; X64-AVX2-NEXT: vptest %ymm0, %ymm0 +; X64-AVX2-NEXT: vpxor (%rsi), %ymm0, %ymm0 +; X64-AVX2-NEXT: vptest %ymm0, %ymm0 ; X64-AVX2-NEXT: sete %al ; X64-AVX2-NEXT: vzeroupper ; X64-AVX2-NEXT: retq +; +; X64-AVX512-LABEL: length32_eq: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0 +; X64-AVX512-NEXT: vpxor (%rsi), %ymm0, %ymm0 +; X64-AVX512-NEXT: vptest %ymm0, %ymm0 +; X64-AVX512-NEXT: sete %al +; X64-AVX512-NEXT: vzeroupper +; X64-AVX512-NEXT: retq %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 32) nounwind %cmp = icmp eq i32 %call, 0 ret i1 %cmp @@ -1390,6 +1438,17 @@ define i1 @length32_eq_prefer128(i8* %x, i8* %y) nounwind "prefer-vector-width"= ; X64-AVX-NEXT: vptest %xmm0, %xmm0 ; X64-AVX-NEXT: sete %al ; X64-AVX-NEXT: retq +; +; X64-AVX512-LABEL: length32_eq_prefer128: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vmovdqu (%rdi), %xmm0 +; X64-AVX512-NEXT: vmovdqu 16(%rdi), %xmm1 +; X64-AVX512-NEXT: vpxor 16(%rsi), %xmm1, %xmm1 +; X64-AVX512-NEXT: vpxor (%rsi), %xmm0, %xmm0 +; X64-AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 +; X64-AVX512-NEXT: vptest %xmm0, %xmm0 +; X64-AVX512-NEXT: sete %al +; X64-AVX512-NEXT: retq %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 32) nounwind %cmp = icmp eq i32 %call, 0 ret i1 %cmp @@ -1464,6 +1523,15 @@ define i1 @length32_eq_const(i8* %X) nounwind { ; X64-AVX2-NEXT: setne %al ; X64-AVX2-NEXT: vzeroupper ; X64-AVX2-NEXT: retq +; +; X64-AVX512-LABEL: length32_eq_const: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0 +; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %ymm0, %ymm0 +; X64-AVX512-NEXT: vptest %ymm0, %ymm0 +; X64-AVX512-NEXT: setne %al +; X64-AVX512-NEXT: vzeroupper +; X64-AVX512-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 32) nounwind %c = icmp ne i32 %m, 0 ret i1 %c @@ -1666,7 +1734,6 @@ define i1 @huge_length_eq(i8* %X, i8* %Y) nounwind { ; X64-NEXT: sete %al ; X64-NEXT: popq %rcx ; X64-NEXT: retq - %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 9223372036854775807) nounwind %c = icmp eq i32 %m, 0 ret i1 %c