forked from OSchip/llvm-project
TTI: Fix vectorization cost for bswap
This commit is contained in:
parent
bba9ba8d95
commit
b38940dfb9
|
@ -1471,6 +1471,12 @@ public:
|
||||||
SingleCallCost = TargetTransformInfo::TCC_Expensive;
|
SingleCallCost = TargetTransformInfo::TCC_Expensive;
|
||||||
break;
|
break;
|
||||||
// FIXME: ctlz, cttz, ...
|
// FIXME: ctlz, cttz, ...
|
||||||
|
case Intrinsic::bswap:
|
||||||
|
ISDs.push_back(ISD::BSWAP);
|
||||||
|
break;
|
||||||
|
case Intrinsic::bitreverse:
|
||||||
|
ISDs.push_back(ISD::BITREVERSE);
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
const TargetLoweringBase *TLI = getTLI();
|
const TargetLoweringBase *TLI = getTLI();
|
||||||
|
|
|
@ -0,0 +1,38 @@
|
||||||
|
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -slp-vectorizer %s | FileCheck -check-prefixes=GCN,GFX7 %s
|
||||||
|
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -slp-vectorizer %s | FileCheck -check-prefixes=GCN,GFX8 %s
|
||||||
|
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -slp-vectorizer %s | FileCheck -check-prefixes=GCN,GFX8 %s
|
||||||
|
|
||||||
|
; GCN-LABEL: @bswap_v2i16(
|
||||||
|
; GFX7: call i16 @llvm.bswap.i16(
|
||||||
|
; GFX7: call i16 @llvm.bswap.i16(
|
||||||
|
|
||||||
|
; GFX8: call <2 x i16> @llvm.bswap.v2i16(
|
||||||
|
define <2 x i16> @bswap_v2i16(<2 x i16> %arg) {
|
||||||
|
bb:
|
||||||
|
%tmp = extractelement <2 x i16> %arg, i64 0
|
||||||
|
%tmp1 = tail call i16 @llvm.bswap.i16(i16 %tmp)
|
||||||
|
%tmp2 = insertelement <2 x i16> undef, i16 %tmp1, i64 0
|
||||||
|
%tmp3 = extractelement <2 x i16> %arg, i64 1
|
||||||
|
%tmp4 = tail call i16 @llvm.bswap.i16(i16 %tmp3)
|
||||||
|
%tmp5 = insertelement <2 x i16> %tmp2, i16 %tmp4, i64 1
|
||||||
|
ret <2 x i16> %tmp5
|
||||||
|
}
|
||||||
|
|
||||||
|
; GCN-LABEL: @bswap_v2i32(
|
||||||
|
; GCN: call i32 @llvm.bswap.i32
|
||||||
|
; GCN: call i32 @llvm.bswap.i32
|
||||||
|
define <2 x i32> @bswap_v2i32(<2 x i32> %arg) {
|
||||||
|
bb:
|
||||||
|
%tmp = extractelement <2 x i32> %arg, i64 0
|
||||||
|
%tmp1 = tail call i32 @llvm.bswap.i32(i32 %tmp)
|
||||||
|
%tmp2 = insertelement <2 x i32> undef, i32 %tmp1, i64 0
|
||||||
|
%tmp3 = extractelement <2 x i32> %arg, i64 1
|
||||||
|
%tmp4 = tail call i32 @llvm.bswap.i32(i32 %tmp3)
|
||||||
|
%tmp5 = insertelement <2 x i32> %tmp2, i32 %tmp4, i64 1
|
||||||
|
ret <2 x i32> %tmp5
|
||||||
|
}
|
||||||
|
|
||||||
|
declare i16 @llvm.bswap.i16(i16) #0
|
||||||
|
declare i32 @llvm.bswap.i32(i32) #0
|
||||||
|
|
||||||
|
attributes #0 = { nounwind readnone speculatable willreturn }
|
Loading…
Reference in New Issue