2018-11-21 10:53:50 +08:00
|
|
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
|
|
; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-unknown \
|
|
|
|
; RUN: -mcpu=pwr9 | FileCheck %s
|
2019-05-14 11:11:24 +08:00
|
|
|
; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-unknown \
|
2018-11-21 10:53:50 +08:00
|
|
|
; RUN: -mcpu=pwr9 -mattr=-altivec | FileCheck %s --check-prefix=NO-ALTIVEC
|
[PPC] Use xxbrd to speed up bswap64
Power doesn't have bswap instructions, so llvm generates following code sequence for bswap64.
rotldi 5, 3, 16
rotldi 4, 3, 8
rotldi 9, 3, 24
rotldi 10, 3, 32
rotldi 11, 3, 48
rotldi 12, 3, 56
rldimi 4, 5, 8, 48
rldimi 4, 9, 16, 40
rldimi 4, 10, 24, 32
rldimi 4, 11, 40, 16
rldimi 4, 12, 48, 8
rldimi 4, 3, 56, 0
But Power9 has vector bswap instructions, they can also be used to speed up scalar bswap intrinsic. With this patch, bswap64 can be translated to:
mtvsrdd 34, 3, 3
xxbrd 34, 34
mfvsrld 3, 34
Differential Revision: https://reviews.llvm.org/D39510
llvm-svn: 317499
2017-11-07 03:09:38 +08:00
|
|
|
|
|
|
|
declare i64 @llvm.bswap.i64(i64)
|
|
|
|
|
|
|
|
define i64 @bswap64(i64 %x) {
|
2018-11-21 10:53:50 +08:00
|
|
|
; CHECK-LABEL: bswap64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: mtvsrdd 34, 3, 3
|
|
|
|
; CHECK-NEXT: xxbrd 0, 34
|
|
|
|
; CHECK-NEXT: mfvsrd 3, 0
|
|
|
|
; CHECK-NEXT: blr
|
|
|
|
;
|
|
|
|
; NO-ALTIVEC-LABEL: bswap64:
|
|
|
|
; NO-ALTIVEC: # %bb.0: # %entry
|
|
|
|
; NO-ALTIVEC-NEXT: rotldi 5, 3, 16
|
|
|
|
; NO-ALTIVEC-NEXT: rotldi 4, 3, 8
|
|
|
|
; NO-ALTIVEC-NEXT: rldimi 4, 5, 8, 48
|
|
|
|
; NO-ALTIVEC-NEXT: rotldi 5, 3, 24
|
|
|
|
; NO-ALTIVEC-NEXT: rldimi 4, 5, 16, 40
|
|
|
|
; NO-ALTIVEC-NEXT: rotldi 5, 3, 32
|
|
|
|
; NO-ALTIVEC-NEXT: rldimi 4, 5, 24, 32
|
|
|
|
; NO-ALTIVEC-NEXT: rotldi 5, 3, 48
|
|
|
|
; NO-ALTIVEC-NEXT: rldimi 4, 5, 40, 16
|
|
|
|
; NO-ALTIVEC-NEXT: rotldi 5, 3, 56
|
|
|
|
; NO-ALTIVEC-NEXT: rldimi 4, 5, 48, 8
|
|
|
|
; NO-ALTIVEC-NEXT: rldimi 4, 3, 56, 0
|
|
|
|
; NO-ALTIVEC-NEXT: mr 3, 4
|
|
|
|
; NO-ALTIVEC-NEXT: blr
|
[PPC] Use xxbrd to speed up bswap64
Power doesn't have bswap instructions, so llvm generates following code sequence for bswap64.
rotldi 5, 3, 16
rotldi 4, 3, 8
rotldi 9, 3, 24
rotldi 10, 3, 32
rotldi 11, 3, 48
rotldi 12, 3, 56
rldimi 4, 5, 8, 48
rldimi 4, 9, 16, 40
rldimi 4, 10, 24, 32
rldimi 4, 11, 40, 16
rldimi 4, 12, 48, 8
rldimi 4, 3, 56, 0
But Power9 has vector bswap instructions, they can also be used to speed up scalar bswap intrinsic. With this patch, bswap64 can be translated to:
mtvsrdd 34, 3, 3
xxbrd 34, 34
mfvsrld 3, 34
Differential Revision: https://reviews.llvm.org/D39510
llvm-svn: 317499
2017-11-07 03:09:38 +08:00
|
|
|
entry:
|
|
|
|
%0 = call i64 @llvm.bswap.i64(i64 %x)
|
|
|
|
ret i64 %0
|
|
|
|
}
|
|
|
|
|