llvm-project/llvm/test/CodeGen/PowerPC/bswap64.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-unknown \
; RUN:   -mcpu=pwr9 | FileCheck %s
; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64le-unkknown-unkknown \
; RUN:   -mcpu=pwr9 -mattr=-altivec | FileCheck %s --check-prefix=NO-ALTIVEC

declare i64 @llvm.bswap.i64(i64)

define i64 @bswap64(i64 %x) {
; CHECK-LABEL: bswap64:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    mtvsrdd 34, 3, 3
; CHECK-NEXT:    xxbrd 0, 34
; CHECK-NEXT:    mfvsrd 3, 0
; CHECK-NEXT:    blr
;
; NO-ALTIVEC-LABEL: bswap64:
; NO-ALTIVEC:       # %bb.0: # %entry
; NO-ALTIVEC-NEXT:    rotldi 5, 3, 16
; NO-ALTIVEC-NEXT:    rotldi 4, 3, 8
; NO-ALTIVEC-NEXT:    rldimi 4, 5, 8, 48
; NO-ALTIVEC-NEXT:    rotldi 5, 3, 24
; NO-ALTIVEC-NEXT:    rldimi 4, 5, 16, 40
; NO-ALTIVEC-NEXT:    rotldi 5, 3, 32
; NO-ALTIVEC-NEXT:    rldimi 4, 5, 24, 32
; NO-ALTIVEC-NEXT:    rotldi 5, 3, 48
; NO-ALTIVEC-NEXT:    rldimi 4, 5, 40, 16
; NO-ALTIVEC-NEXT:    rotldi 5, 3, 56
; NO-ALTIVEC-NEXT:    rldimi 4, 5, 48, 8
; NO-ALTIVEC-NEXT:    rldimi 4, 3, 56, 0
; NO-ALTIVEC-NEXT:    mr 3, 4
; NO-ALTIVEC-NEXT:    blr
entry:
  %0 = call i64 @llvm.bswap.i64(i64 %x)
  ret i64 %0
}
[PowerPC] Do not use vectors to codegen bswap with Altivec turned off We have efficient codegen on P9 for lowering bswap that involves moving the value into a vector reg and moving it back. However, the check under which we custom lowered it did not adequately reflect the actual requirements. It required only that the subtarget be an implementation of ISA 3.0 since all compliant implementations have to provide the vector instructions. However, the kernel builds have a valid use case for -mno-altivec -mcpu=pwr9 (i.e. don't emit vector code, don't have to save vector regs for context switch). So we should require the correct features for this lowering. Fixes https://bugs.llvm.org/show_bug.cgi?id=39334 llvm-svn: 347376 2018-11-21 10:53:50 +08:00			`; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py`
			`; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-unknown \`
			`; RUN: -mcpu=pwr9 \| FileCheck %s`
			`; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64le-unkknown-unkknown \`
			`; RUN: -mcpu=pwr9 -mattr=-altivec \| FileCheck %s --check-prefix=NO-ALTIVEC`
[PPC] Use xxbrd to speed up bswap64 Power doesn't have bswap instructions, so llvm generates following code sequence for bswap64. rotldi 5, 3, 16 rotldi 4, 3, 8 rotldi 9, 3, 24 rotldi 10, 3, 32 rotldi 11, 3, 48 rotldi 12, 3, 56 rldimi 4, 5, 8, 48 rldimi 4, 9, 16, 40 rldimi 4, 10, 24, 32 rldimi 4, 11, 40, 16 rldimi 4, 12, 48, 8 rldimi 4, 3, 56, 0 But Power9 has vector bswap instructions, they can also be used to speed up scalar bswap intrinsic. With this patch, bswap64 can be translated to: mtvsrdd 34, 3, 3 xxbrd 34, 34 mfvsrld 3, 34 Differential Revision: https://reviews.llvm.org/D39510 llvm-svn: 317499 2017-11-07 03:09:38 +08:00
			`declare i64 @llvm.bswap.i64(i64)`

			`define i64 @bswap64(i64 %x) {`
[PowerPC] Do not use vectors to codegen bswap with Altivec turned off We have efficient codegen on P9 for lowering bswap that involves moving the value into a vector reg and moving it back. However, the check under which we custom lowered it did not adequately reflect the actual requirements. It required only that the subtarget be an implementation of ISA 3.0 since all compliant implementations have to provide the vector instructions. However, the kernel builds have a valid use case for -mno-altivec -mcpu=pwr9 (i.e. don't emit vector code, don't have to save vector regs for context switch). So we should require the correct features for this lowering. Fixes https://bugs.llvm.org/show_bug.cgi?id=39334 llvm-svn: 347376 2018-11-21 10:53:50 +08:00			`; CHECK-LABEL: bswap64:`
			`; CHECK: # %bb.0: # %entry`
			`; CHECK-NEXT: mtvsrdd 34, 3, 3`
			`; CHECK-NEXT: xxbrd 0, 34`
			`; CHECK-NEXT: mfvsrd 3, 0`
			`; CHECK-NEXT: blr`
			`;`
			`; NO-ALTIVEC-LABEL: bswap64:`
			`; NO-ALTIVEC: # %bb.0: # %entry`
			`; NO-ALTIVEC-NEXT: rotldi 5, 3, 16`
			`; NO-ALTIVEC-NEXT: rotldi 4, 3, 8`
			`; NO-ALTIVEC-NEXT: rldimi 4, 5, 8, 48`
			`; NO-ALTIVEC-NEXT: rotldi 5, 3, 24`
			`; NO-ALTIVEC-NEXT: rldimi 4, 5, 16, 40`
			`; NO-ALTIVEC-NEXT: rotldi 5, 3, 32`
			`; NO-ALTIVEC-NEXT: rldimi 4, 5, 24, 32`
			`; NO-ALTIVEC-NEXT: rotldi 5, 3, 48`
			`; NO-ALTIVEC-NEXT: rldimi 4, 5, 40, 16`
			`; NO-ALTIVEC-NEXT: rotldi 5, 3, 56`
			`; NO-ALTIVEC-NEXT: rldimi 4, 5, 48, 8`
			`; NO-ALTIVEC-NEXT: rldimi 4, 3, 56, 0`
			`; NO-ALTIVEC-NEXT: mr 3, 4`
			`; NO-ALTIVEC-NEXT: blr`
[PPC] Use xxbrd to speed up bswap64 Power doesn't have bswap instructions, so llvm generates following code sequence for bswap64. rotldi 5, 3, 16 rotldi 4, 3, 8 rotldi 9, 3, 24 rotldi 10, 3, 32 rotldi 11, 3, 48 rotldi 12, 3, 56 rldimi 4, 5, 8, 48 rldimi 4, 9, 16, 40 rldimi 4, 10, 24, 32 rldimi 4, 11, 40, 16 rldimi 4, 12, 48, 8 rldimi 4, 3, 56, 0 But Power9 has vector bswap instructions, they can also be used to speed up scalar bswap intrinsic. With this patch, bswap64 can be translated to: mtvsrdd 34, 3, 3 xxbrd 34, 34 mfvsrld 3, 34 Differential Revision: https://reviews.llvm.org/D39510 llvm-svn: 317499 2017-11-07 03:09:38 +08:00			`entry:`
			`%0 = call i64 @llvm.bswap.i64(i64 %x)`
			`ret i64 %0`
			`}`