From f5b18923483fd6a033acd60b8036bd9d6642ccbd Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Fri, 12 Oct 2018 15:22:14 +0000 Subject: [PATCH] [AArch64][x86] add tests for trunc disguised as vector ops (PR39016); NFC These correspond to the IR transform from: D52439 llvm-svn: 344353 --- llvm/test/CodeGen/AArch64/extract-insert.ll | 118 ++++++++++++++++++++ llvm/test/CodeGen/X86/extract-insert.ll | 55 ++++++++- 2 files changed, 172 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/AArch64/extract-insert.ll diff --git a/llvm/test/CodeGen/AArch64/extract-insert.ll b/llvm/test/CodeGen/AArch64/extract-insert.ll new file mode 100644 index 000000000000..91f6518edd8c --- /dev/null +++ b/llvm/test/CodeGen/AArch64/extract-insert.ll @@ -0,0 +1,118 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64_be-- < %s | FileCheck %s --check-prefix=BE +; RUN: llc -mtriple=aarch64-- < %s | FileCheck %s --check-prefix=LE + +define i32 @trunc_i64_to_i32_le(i64 %x) { +; BE-LABEL: trunc_i64_to_i32_le: +; BE: // %bb.0: +; BE-NEXT: fmov d0, x0 +; BE-NEXT: rev64 v0.4s, v0.4s +; BE-NEXT: fmov w0, s0 +; BE-NEXT: ret +; +; LE-LABEL: trunc_i64_to_i32_le: +; LE: // %bb.0: +; LE-NEXT: fmov d0, x0 +; LE-NEXT: fmov w0, s0 +; LE-NEXT: ret + %ins = insertelement <2 x i64> undef, i64 %x, i32 0 + %bc = bitcast <2 x i64> %ins to <4 x i32> + %ext = extractelement <4 x i32> %bc, i32 0 + ret i32 %ext +} + +define i32 @trunc_i64_to_i32_be(i64 %x) { +; BE-LABEL: trunc_i64_to_i32_be: +; BE: // %bb.0: +; BE-NEXT: fmov d0, x0 +; BE-NEXT: rev64 v0.4s, v0.4s +; BE-NEXT: mov w0, v0.s[1] +; BE-NEXT: ret +; +; LE-LABEL: trunc_i64_to_i32_be: +; LE: // %bb.0: +; LE-NEXT: fmov d0, x0 +; LE-NEXT: mov w0, v0.s[1] +; LE-NEXT: ret + %ins = insertelement <2 x i64> undef, i64 %x, i32 0 + %bc = bitcast <2 x i64> %ins to <4 x i32> + %ext = extractelement <4 x i32> %bc, i32 1 + ret i32 %ext +} + +define i16 @trunc_i64_to_i16_le(i64 %x) { +; BE-LABEL: trunc_i64_to_i16_le: +; BE: // %bb.0: +; BE-NEXT: fmov d0, x0 +; BE-NEXT: rev64 v0.8h, v0.8h +; BE-NEXT: umov w0, v0.h[0] +; BE-NEXT: ret +; +; LE-LABEL: trunc_i64_to_i16_le: +; LE: // %bb.0: +; LE-NEXT: fmov d0, x0 +; LE-NEXT: umov w0, v0.h[0] +; LE-NEXT: ret + %ins = insertelement <2 x i64> undef, i64 %x, i32 0 + %bc = bitcast <2 x i64> %ins to <8 x i16> + %ext = extractelement <8 x i16> %bc, i32 0 + ret i16 %ext +} + +define i16 @trunc_i64_to_i16_be(i64 %x) { +; BE-LABEL: trunc_i64_to_i16_be: +; BE: // %bb.0: +; BE-NEXT: fmov d0, x0 +; BE-NEXT: rev64 v0.8h, v0.8h +; BE-NEXT: umov w0, v0.h[3] +; BE-NEXT: ret +; +; LE-LABEL: trunc_i64_to_i16_be: +; LE: // %bb.0: +; LE-NEXT: fmov d0, x0 +; LE-NEXT: umov w0, v0.h[3] +; LE-NEXT: ret + %ins = insertelement <2 x i64> undef, i64 %x, i32 0 + %bc = bitcast <2 x i64> %ins to <8 x i16> + %ext = extractelement <8 x i16> %bc, i32 3 + ret i16 %ext +} + +define i8 @trunc_i32_to_i8_le(i32 %x) { +; BE-LABEL: trunc_i32_to_i8_le: +; BE: // %bb.0: +; BE-NEXT: fmov s0, w0 +; BE-NEXT: rev32 v0.16b, v0.16b +; BE-NEXT: umov w0, v0.b[0] +; BE-NEXT: ret +; +; LE-LABEL: trunc_i32_to_i8_le: +; LE: // %bb.0: +; LE-NEXT: fmov s0, w0 +; LE-NEXT: umov w0, v0.b[0] +; LE-NEXT: ret + %ins = insertelement <4 x i32> undef, i32 %x, i32 0 + %bc = bitcast <4 x i32> %ins to <16 x i8> + %ext = extractelement <16 x i8> %bc, i32 0 + ret i8 %ext +} + +define i8 @trunc_i32_to_i8_be(i32 %x) { +; BE-LABEL: trunc_i32_to_i8_be: +; BE: // %bb.0: +; BE-NEXT: fmov s0, w0 +; BE-NEXT: rev32 v0.16b, v0.16b +; BE-NEXT: umov w0, v0.b[3] +; BE-NEXT: ret +; +; LE-LABEL: trunc_i32_to_i8_be: +; LE: // %bb.0: +; LE-NEXT: fmov s0, w0 +; LE-NEXT: umov w0, v0.b[3] +; LE-NEXT: ret + %ins = insertelement <4 x i32> undef, i32 %x, i32 0 + %bc = bitcast <4 x i32> %ins to <16 x i8> + %ext = extractelement <16 x i8> %bc, i32 3 + ret i8 %ext +} + diff --git a/llvm/test/CodeGen/X86/extract-insert.ll b/llvm/test/CodeGen/X86/extract-insert.ll index b3fb50de718a..2393e32ebf6c 100644 --- a/llvm/test/CodeGen/X86/extract-insert.ll +++ b/llvm/test/CodeGen/X86/extract-insert.ll @@ -29,7 +29,7 @@ define i8 @extractelt_bitcast(i32 %x) nounwind { } ; TODO: This should have folded to avoid vector ops, but the transform -; is guarded by 'hasOneUse'. That limitation apparently makes some AMDGPU +; is guarded by 'hasOneUse'. That limitation apparently makes some AMDGPU ; codegen better. define i8 @extractelt_bitcast_extra_use(i32 %x, <4 x i8>* %p) nounwind { @@ -60,3 +60,56 @@ define i8 @extractelt_bitcast_extra_use(i32 %x, <4 x i8>* %p) nounwind { ret i8 %ext } +define i32 @trunc_i64_to_i32_le(i64 %x) { +; X86-LABEL: trunc_i64_to_i32_le: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: retl +; +; X64-LABEL: trunc_i64_to_i32_le: +; X64: # %bb.0: +; X64-NEXT: movq %rdi, %xmm0 +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: retq + %ins = insertelement <2 x i64> undef, i64 %x, i32 0 + %bc = bitcast <2 x i64> %ins to <4 x i32> + %ext = extractelement <4 x i32> %bc, i32 0 + ret i32 %ext +} + +define i16 @trunc_i64_to_i16_le(i64 %x) { +; X86-LABEL: trunc_i64_to_i16_le: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: retl +; +; X64-LABEL: trunc_i64_to_i16_le: +; X64: # %bb.0: +; X64-NEXT: movq %rdi, %xmm0 +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: retq + %ins = insertelement <2 x i64> undef, i64 %x, i32 0 + %bc = bitcast <2 x i64> %ins to <8 x i16> + %ext = extractelement <8 x i16> %bc, i32 0 + ret i16 %ext +} + +define i8 @trunc_i32_to_i8_le(i32 %x) { +; X86-LABEL: trunc_i32_to_i8_le: +; X86: # %bb.0: +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: retl +; +; X64-LABEL: trunc_i32_to_i8_le: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: retq + %ins = insertelement <4 x i32> undef, i32 %x, i32 0 + %bc = bitcast <4 x i32> %ins to <16 x i8> + %ext = extractelement <16 x i8> %bc, i32 0 + ret i8 %ext +} +