forked from OSchip/llvm-project
[AArch64] Lower bitreverse in ISel
Adding lowering support for bitreverse. Previously, lowering bitreverse would expand it into a series of other instructions. This patch makes it so this produces a single rbit instruction instead. Reviewed By: dmgreen Differential Revision: https://reviews.llvm.org/D102397
This commit is contained in:
parent
888ce70af2
commit
50511df32e
|
@ -10879,7 +10879,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
|
|||
}
|
||||
case NEON::BI__builtin_neon_vrbit_v:
|
||||
case NEON::BI__builtin_neon_vrbitq_v: {
|
||||
Int = Intrinsic::aarch64_neon_rbit;
|
||||
Int = Intrinsic::bitreverse;
|
||||
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
|
||||
}
|
||||
case NEON::BI__builtin_neon_vaddv_u8:
|
||||
|
|
|
@ -1766,42 +1766,42 @@ poly8x16_t test_vmvnq_p8(poly8x16_t a) {
|
|||
}
|
||||
|
||||
// CHECK-LABEL: @test_vrbit_s8(
|
||||
// CHECK: [[VRBIT_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rbit.v8i8(<8 x i8> %a)
|
||||
// CHECK: [[VRBIT_I:%.*]] = call <8 x i8> @llvm.bitreverse.v8i8(<8 x i8> %a)
|
||||
// CHECK: ret <8 x i8> [[VRBIT_I]]
|
||||
int8x8_t test_vrbit_s8(int8x8_t a) {
|
||||
return vrbit_s8(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vrbitq_s8(
|
||||
// CHECK: [[VRBIT_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.rbit.v16i8(<16 x i8> %a)
|
||||
// CHECK: [[VRBIT_I:%.*]] = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> %a)
|
||||
// CHECK: ret <16 x i8> [[VRBIT_I]]
|
||||
int8x16_t test_vrbitq_s8(int8x16_t a) {
|
||||
return vrbitq_s8(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vrbit_u8(
|
||||
// CHECK: [[VRBIT_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rbit.v8i8(<8 x i8> %a)
|
||||
// CHECK: [[VRBIT_I:%.*]] = call <8 x i8> @llvm.bitreverse.v8i8(<8 x i8> %a)
|
||||
// CHECK: ret <8 x i8> [[VRBIT_I]]
|
||||
uint8x8_t test_vrbit_u8(uint8x8_t a) {
|
||||
return vrbit_u8(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vrbitq_u8(
|
||||
// CHECK: [[VRBIT_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.rbit.v16i8(<16 x i8> %a)
|
||||
// CHECK: [[VRBIT_I:%.*]] = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> %a)
|
||||
// CHECK: ret <16 x i8> [[VRBIT_I]]
|
||||
uint8x16_t test_vrbitq_u8(uint8x16_t a) {
|
||||
return vrbitq_u8(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vrbit_p8(
|
||||
// CHECK: [[VRBIT_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rbit.v8i8(<8 x i8> %a)
|
||||
// CHECK: [[VRBIT_I:%.*]] = call <8 x i8> @llvm.bitreverse.v8i8(<8 x i8> %a)
|
||||
// CHECK: ret <8 x i8> [[VRBIT_I]]
|
||||
poly8x8_t test_vrbit_p8(poly8x8_t a) {
|
||||
return vrbit_p8(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vrbitq_p8(
|
||||
// CHECK: [[VRBIT_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.rbit.v16i8(<16 x i8> %a)
|
||||
// CHECK: [[VRBIT_I:%.*]] = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> %a)
|
||||
// CHECK: ret <16 x i8> [[VRBIT_I]]
|
||||
poly8x16_t test_vrbitq_p8(poly8x16_t a) {
|
||||
return vrbitq_p8(a);
|
||||
|
|
|
@ -444,9 +444,6 @@ let TargetPrefix = "aarch64", IntrProperties = [IntrNoMem] in {
|
|||
def int_aarch64_neon_ursqrte : AdvSIMD_1VectorArg_Intrinsic;
|
||||
def int_aarch64_neon_frsqrte : AdvSIMD_1FloatArg_Intrinsic;
|
||||
|
||||
// Vector Bitwise Reverse
|
||||
def int_aarch64_neon_rbit : AdvSIMD_1VectorArg_Intrinsic;
|
||||
|
||||
// Vector Conversions Between Half-Precision and Single-Precision.
|
||||
def int_aarch64_neon_vcvtfp2hf
|
||||
: DefaultAttrsIntrinsic<[llvm_v4i16_ty], [llvm_v4f32_ty], [IntrNoMem]>;
|
||||
|
|
|
@ -553,6 +553,11 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
|
|||
F->arg_begin()->getType());
|
||||
return true;
|
||||
}
|
||||
if (Name.startswith("aarch64.neon.rbit")) {
|
||||
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
|
||||
F->arg_begin()->getType());
|
||||
return true;
|
||||
}
|
||||
if (Name.startswith("arm.neon.vclz")) {
|
||||
Type* args[2] = {
|
||||
F->arg_begin()->getType(),
|
||||
|
|
|
@ -1023,6 +1023,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
|
|||
|
||||
setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
|
||||
setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
|
||||
setOperationAction(ISD::BITREVERSE, MVT::v8i8, Legal);
|
||||
setOperationAction(ISD::BITREVERSE, MVT::v16i8, Legal);
|
||||
|
||||
// AArch64 doesn't have MUL.2d:
|
||||
setOperationAction(ISD::MUL, MVT::v2i64, Expand);
|
||||
|
|
|
@ -4166,7 +4166,7 @@ def : Pat<(vnot (v4i32 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
|
|||
def : Pat<(vnot (v1i64 V64:$Rn)), (NOTv8i8 V64:$Rn)>;
|
||||
def : Pat<(vnot (v2i64 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
|
||||
|
||||
defm RBIT : SIMDTwoVectorB<1, 0b01, 0b00101, "rbit", int_aarch64_neon_rbit>;
|
||||
defm RBIT : SIMDTwoVectorB<1, 0b01, 0b00101, "rbit", bitreverse>;
|
||||
defm REV16 : SIMDTwoVectorB<0, 0b00, 0b00001, "rev16", AArch64rev16>;
|
||||
defm REV32 : SIMDTwoVectorBH<1, 0b00000, "rev32", AArch64rev32>;
|
||||
defm REV64 : SIMDTwoVectorBHS<0, 0b00000, "rev64", AArch64rev64>;
|
||||
|
|
|
@ -4,7 +4,7 @@ define <8 x i8> @rbit_8b(<8 x i8>* %A) nounwind {
|
|||
;CHECK-LABEL: rbit_8b:
|
||||
;CHECK: rbit.8b
|
||||
%tmp1 = load <8 x i8>, <8 x i8>* %A
|
||||
%tmp3 = call <8 x i8> @llvm.aarch64.neon.rbit.v8i8(<8 x i8> %tmp1)
|
||||
%tmp3 = call <8 x i8> @llvm.bitreverse.v8i8(<8 x i8> %tmp1)
|
||||
ret <8 x i8> %tmp3
|
||||
}
|
||||
|
||||
|
@ -12,12 +12,12 @@ define <16 x i8> @rbit_16b(<16 x i8>* %A) nounwind {
|
|||
;CHECK-LABEL: rbit_16b:
|
||||
;CHECK: rbit.16b
|
||||
%tmp1 = load <16 x i8>, <16 x i8>* %A
|
||||
%tmp3 = call <16 x i8> @llvm.aarch64.neon.rbit.v16i8(<16 x i8> %tmp1)
|
||||
%tmp3 = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> %tmp1)
|
||||
ret <16 x i8> %tmp3
|
||||
}
|
||||
|
||||
declare <8 x i8> @llvm.aarch64.neon.rbit.v8i8(<8 x i8>) nounwind readnone
|
||||
declare <16 x i8> @llvm.aarch64.neon.rbit.v16i8(<16 x i8>) nounwind readnone
|
||||
declare <8 x i8> @llvm.bitreverse.v8i8(<8 x i8>) nounwind readnone
|
||||
declare <16 x i8> @llvm.bitreverse.v16i8(<16 x i8>) nounwind readnone
|
||||
|
||||
define <8 x i16> @sxtl8h(<8 x i8>* %A) nounwind {
|
||||
;CHECK-LABEL: sxtl8h:
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=aarch64-eabi %s -o - | FileCheck %s
|
||||
|
||||
; These tests just check that the plumbing is in place for @llvm.bitreverse.
|
||||
|
@ -6,13 +7,16 @@ declare <2 x i16> @llvm.bitreverse.v2i16(<2 x i16>) readnone
|
|||
|
||||
define <2 x i16> @f(<2 x i16> %a) {
|
||||
; CHECK-LABEL: f:
|
||||
; CHECK: fmov [[REG1:w[0-9]+]], s0
|
||||
; CHECK-DAG: rbit [[REG2:w[0-9]+]], [[REG1]]
|
||||
; CHECK-DAG: fmov s0, [[REG2]]
|
||||
; CHECK-DAG: mov [[REG3:w[0-9]+]], v0.s[1]
|
||||
; CHECK-DAG: rbit [[REG4:w[0-9]+]], [[REG3]]
|
||||
; CHECK-DAG: mov v0.s[1], [[REG4]]
|
||||
; CHECK-DAG: ushr v0.2s, v0.2s, #16
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
||||
; CHECK-NEXT: fmov w8, s0
|
||||
; CHECK-NEXT: rbit w8, w8
|
||||
; CHECK-NEXT: mov w9, v0.s[1]
|
||||
; CHECK-NEXT: fmov s0, w8
|
||||
; CHECK-NEXT: rbit w8, w9
|
||||
; CHECK-NEXT: mov v0.s[1], w8
|
||||
; CHECK-NEXT: ushr v0.2s, v0.2s, #16
|
||||
; CHECK-NEXT: ret
|
||||
%b = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> %a)
|
||||
ret <2 x i16> %b
|
||||
}
|
||||
|
@ -21,41 +25,161 @@ declare i8 @llvm.bitreverse.i8(i8) readnone
|
|||
|
||||
define i8 @g(i8 %a) {
|
||||
; CHECK-LABEL: g:
|
||||
; CHECK: rbit [[REG:w[0-9]+]], w0
|
||||
; CHECK-NEXT: lsr w0, [[REG]], #24
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: rbit w8, w0
|
||||
; CHECK-NEXT: lsr w0, w8, #24
|
||||
; CHECK-NEXT: ret
|
||||
%b = call i8 @llvm.bitreverse.i8(i8 %a)
|
||||
ret i8 %b
|
||||
}
|
||||
|
||||
declare i16 @llvm.bitreverse.i16(i16) readnone
|
||||
|
||||
define i16 @g_16(i16 %a) {
|
||||
; CHECK-LABEL: g_16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: rbit w8, w0
|
||||
; CHECK-NEXT: lsr w0, w8, #16
|
||||
; CHECK-NEXT: ret
|
||||
%b = call i16 @llvm.bitreverse.i16(i16 %a)
|
||||
ret i16 %b
|
||||
}
|
||||
|
||||
declare i32 @llvm.bitreverse.i32(i32) readnone
|
||||
|
||||
define i32 @g_32(i32 %a) {
|
||||
; CHECK-LABEL: g_32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: rbit w0, w0
|
||||
; CHECK-NEXT: ret
|
||||
%b = call i32 @llvm.bitreverse.i32(i32 %a)
|
||||
ret i32 %b
|
||||
}
|
||||
|
||||
declare i64 @llvm.bitreverse.i64(i64) readnone
|
||||
|
||||
define i64 @g_64(i64 %a) {
|
||||
; CHECK-LABEL: g_64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: rbit x0, x0
|
||||
; CHECK-NEXT: ret
|
||||
%b = call i64 @llvm.bitreverse.i64(i64 %a)
|
||||
ret i64 %b
|
||||
}
|
||||
|
||||
declare <8 x i8> @llvm.bitreverse.v8i8(<8 x i8>) readnone
|
||||
|
||||
define <8 x i8> @g_vec(<8 x i8> %a) {
|
||||
; CHECK-DAG: movi [[M1:v.*]], #15
|
||||
; CHECK-DAG: movi [[M2:v.*]], #240
|
||||
; CHECK: and [[A1:v.*]], v0.8b, [[M1]]
|
||||
; CHECK: and [[A2:v.*]], v0.8b, [[M2]]
|
||||
; CHECK-DAG: shl [[L4:v.*]], [[A1]], #4
|
||||
; CHECK-DAG: ushr [[R4:v.*]], [[A2]], #4
|
||||
; CHECK-DAG: orr [[V4:v.*]], [[R4]], [[L4]]
|
||||
|
||||
; CHECK-DAG: movi [[M3:v.*]], #51
|
||||
; CHECK-DAG: movi [[M4:v.*]], #204
|
||||
; CHECK: and [[A3:v.*]], [[V4]], [[M3]]
|
||||
; CHECK: and [[A4:v.*]], [[V4]], [[M4]]
|
||||
; CHECK-DAG: shl [[L2:v.*]], [[A3]], #2
|
||||
; CHECK-DAG: ushr [[R2:v.*]], [[A4]], #2
|
||||
; CHECK-DAG: orr [[V2:v.*]], [[R2]], [[L2]]
|
||||
|
||||
; CHECK-DAG: movi [[M5:v.*]], #85
|
||||
; CHECK-DAG: movi [[M6:v.*]], #170
|
||||
; CHECK: and [[A5:v.*]], [[V2]], [[M5]]
|
||||
; CHECK: and [[A6:v.*]], [[V2]], [[M6]]
|
||||
; CHECK-DAG: shl [[L1:v.*]], [[A5]], #1
|
||||
; CHECK-DAG: ushr [[R1:v.*]], [[A6]], #1
|
||||
; CHECK: orr [[V1:v.*]], [[R1]], [[L1]]
|
||||
|
||||
; CHECK: ret
|
||||
; CHECK-LABEL: g_vec:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: rbit v0.8b, v0.8b
|
||||
; CHECK-NEXT: ret
|
||||
%b = call <8 x i8> @llvm.bitreverse.v8i8(<8 x i8> %a)
|
||||
ret <8 x i8> %b
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.bitreverse.v16i8(<16 x i8>) readnone
|
||||
|
||||
define <16 x i8> @g_vec_16x8(<16 x i8> %a) {
|
||||
; CHECK-LABEL: g_vec_16x8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: rbit v0.16b, v0.16b
|
||||
; CHECK-NEXT: ret
|
||||
%b = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> %a)
|
||||
ret <16 x i8> %b
|
||||
}
|
||||
|
||||
declare <4 x i16> @llvm.bitreverse.v4i16(<4 x i16>) readnone
|
||||
|
||||
define <4 x i16> @g_vec_4x16(<4 x i16> %a) {
|
||||
; CHECK-LABEL: g_vec_4x16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: rev16 v0.8b, v0.8b
|
||||
; CHECK-NEXT: rbit v0.8b, v0.8b
|
||||
; CHECK-NEXT: ret
|
||||
%b = call <4 x i16> @llvm.bitreverse.v4i16(<4 x i16> %a)
|
||||
ret <4 x i16> %b
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.bitreverse.v8i16(<8 x i16>) readnone
|
||||
|
||||
define <8 x i16> @g_vec_8x16(<8 x i16> %a) {
|
||||
; CHECK-LABEL: g_vec_8x16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: rev16 v0.16b, v0.16b
|
||||
; CHECK-NEXT: rbit v0.16b, v0.16b
|
||||
; CHECK-NEXT: ret
|
||||
%b = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> %a)
|
||||
ret <8 x i16> %b
|
||||
}
|
||||
|
||||
declare <2 x i32> @llvm.bitreverse.v2i32(<2 x i32>) readnone
|
||||
|
||||
define <2 x i32> @g_vec_2x32(<2 x i32> %a) {
|
||||
; CHECK-LABEL: g_vec_2x32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
||||
; CHECK-NEXT: fmov w8, s0
|
||||
; CHECK-NEXT: rbit w8, w8
|
||||
; CHECK-NEXT: mov w9, v0.s[1]
|
||||
; CHECK-NEXT: fmov s0, w8
|
||||
; CHECK-NEXT: rbit w8, w9
|
||||
; CHECK-NEXT: mov v0.s[1], w8
|
||||
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%b = call <2 x i32> @llvm.bitreverse.v2i32(<2 x i32> %a)
|
||||
ret <2 x i32> %b
|
||||
}
|
||||
|
||||
declare <4 x i32> @llvm.bitreverse.v4i32(<4 x i32>) readnone
|
||||
|
||||
define <4 x i32> @g_vec_4x32(<4 x i32> %a) {
|
||||
; CHECK-LABEL: g_vec_4x32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: fmov w10, s0
|
||||
; CHECK-NEXT: mov w8, v0.s[1]
|
||||
; CHECK-NEXT: rbit w10, w10
|
||||
; CHECK-NEXT: mov w9, v0.s[2]
|
||||
; CHECK-NEXT: mov w11, v0.s[3]
|
||||
; CHECK-NEXT: fmov s0, w10
|
||||
; CHECK-NEXT: rbit w8, w8
|
||||
; CHECK-NEXT: rbit w9, w9
|
||||
; CHECK-NEXT: mov v0.s[1], w8
|
||||
; CHECK-NEXT: mov v0.s[2], w9
|
||||
; CHECK-NEXT: rbit w8, w11
|
||||
; CHECK-NEXT: mov v0.s[3], w8
|
||||
; CHECK-NEXT: ret
|
||||
%b = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> %a)
|
||||
ret <4 x i32> %b
|
||||
}
|
||||
|
||||
declare <1 x i64> @llvm.bitreverse.v1i64(<1 x i64>) readnone
|
||||
|
||||
define <1 x i64> @g_vec_1x64(<1 x i64> %a) {
|
||||
; CHECK-LABEL: g_vec_1x64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
||||
; CHECK-NEXT: fmov x8, d0
|
||||
; CHECK-NEXT: rbit x8, x8
|
||||
; CHECK-NEXT: fmov d0, x8
|
||||
; CHECK-NEXT: ret
|
||||
%b = call <1 x i64> @llvm.bitreverse.v1i64(<1 x i64> %a)
|
||||
ret <1 x i64> %b
|
||||
}
|
||||
|
||||
declare <2 x i64> @llvm.bitreverse.v2i64(<2 x i64>) readnone
|
||||
|
||||
define <2 x i64> @g_vec_2x64(<2 x i64> %a) {
|
||||
; CHECK-LABEL: g_vec_2x64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: fmov x8, d0
|
||||
; CHECK-NEXT: rbit x8, x8
|
||||
; CHECK-NEXT: mov x9, v0.d[1]
|
||||
; CHECK-NEXT: fmov d0, x8
|
||||
; CHECK-NEXT: rbit x8, x9
|
||||
; CHECK-NEXT: mov v0.d[1], x8
|
||||
; CHECK-NEXT: ret
|
||||
%b = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> %a)
|
||||
ret <2 x i64> %b
|
||||
}
|
||||
|
|
|
@ -0,0 +1,121 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=aarch64-eabi -mattr=+fullfp16 %s -o - | FileCheck %s
|
||||
|
||||
; The llvm.aarch64_neon_rbit intrinsic should be auto-upgraded to the
|
||||
; target-independent bitreverse intrinsic.
|
||||
|
||||
declare <8 x i8> @llvm.aarch64.neon.rbit.v8i8(<8 x i8>) nounwind readnone
|
||||
|
||||
define <8 x i8> @rbit_8x8(<8 x i8> %A) nounwind {
|
||||
; CHECK-LABEL: rbit_8x8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: rbit v0.8b, v0.8b
|
||||
; CHECK-NEXT: ret
|
||||
%tmp3 = call <8 x i8> @llvm.aarch64.neon.rbit.v8i8(<8 x i8> %A)
|
||||
ret <8 x i8> %tmp3
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.aarch64.neon.rbit.v16i8(<16 x i8>) nounwind readnone
|
||||
|
||||
define <16 x i8> @rbit_16x8(<16 x i8> %A) nounwind {
|
||||
; CHECK-LABEL: rbit_16x8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: rbit v0.16b, v0.16b
|
||||
; CHECK-NEXT: ret
|
||||
%tmp3 = call <16 x i8> @llvm.aarch64.neon.rbit.v16i8(<16 x i8> %A)
|
||||
ret <16 x i8> %tmp3
|
||||
}
|
||||
|
||||
declare <4 x i16> @llvm.aarch64.neon.rbit.v4i16(<4 x i16>) nounwind readnone
|
||||
|
||||
define <4 x i16> @rbit_4x16(<4 x i16> %A) nounwind {
|
||||
; CHECK-LABEL: rbit_4x16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: rev16 v0.8b, v0.8b
|
||||
; CHECK-NEXT: rbit v0.8b, v0.8b
|
||||
; CHECK-NEXT: ret
|
||||
%tmp3 = call <4 x i16> @llvm.aarch64.neon.rbit.v4i16(<4 x i16> %A)
|
||||
ret <4 x i16> %tmp3
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.aarch64.neon.rbit.v8i16(<8 x i16>) nounwind readnone
|
||||
|
||||
define <8 x i16> @rbit_8x16(<8 x i16> %A) {
|
||||
; CHECK-LABEL: rbit_8x16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: rev16 v0.16b, v0.16b
|
||||
; CHECK-NEXT: rbit v0.16b, v0.16b
|
||||
; CHECK-NEXT: ret
|
||||
%tmp3 = call <8 x i16> @llvm.aarch64.neon.rbit.v8i16(<8 x i16> %A)
|
||||
ret <8 x i16> %tmp3
|
||||
}
|
||||
|
||||
declare <2 x i32> @llvm.aarch64.neon.rbit.v2i32(<2 x i32>) nounwind readnone
|
||||
|
||||
define <2 x i32> @rbit_2x32(<2 x i32> %A) {
|
||||
; CHECK-LABEL: rbit_2x32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
||||
; CHECK-NEXT: fmov w8, s0
|
||||
; CHECK-NEXT: rbit w8, w8
|
||||
; CHECK-NEXT: mov w9, v0.s[1]
|
||||
; CHECK-NEXT: fmov s0, w8
|
||||
; CHECK-NEXT: rbit w8, w9
|
||||
; CHECK-NEXT: mov v0.s[1], w8
|
||||
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
|
||||
; CHECK-NEXT: ret
|
||||
%tmp3 = call <2 x i32> @llvm.aarch64.neon.rbit.v2i32(<2 x i32> %A)
|
||||
ret <2 x i32> %tmp3
|
||||
}
|
||||
|
||||
declare <4 x i32> @llvm.aarch64.neon.rbit.v4i32(<4 x i32>) nounwind readnone
|
||||
|
||||
define <4 x i32> @rbit_4x32(<4 x i32> %A) {
|
||||
; CHECK-LABEL: rbit_4x32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: fmov w10, s0
|
||||
; CHECK-NEXT: mov w8, v0.s[1]
|
||||
; CHECK-NEXT: rbit w10, w10
|
||||
; CHECK-NEXT: mov w9, v0.s[2]
|
||||
; CHECK-NEXT: mov w11, v0.s[3]
|
||||
; CHECK-NEXT: fmov s0, w10
|
||||
; CHECK-NEXT: rbit w8, w8
|
||||
; CHECK-NEXT: rbit w9, w9
|
||||
; CHECK-NEXT: mov v0.s[1], w8
|
||||
; CHECK-NEXT: mov v0.s[2], w9
|
||||
; CHECK-NEXT: rbit w8, w11
|
||||
; CHECK-NEXT: mov v0.s[3], w8
|
||||
; CHECK-NEXT: ret
|
||||
%tmp3 = call <4 x i32> @llvm.aarch64.neon.rbit.v4i32(<4 x i32> %A)
|
||||
ret <4 x i32> %tmp3
|
||||
}
|
||||
|
||||
declare <1 x i64> @llvm.aarch64.neon.rbit.v1i64(<1 x i64>) readnone
|
||||
|
||||
define <1 x i64> @rbit_1x64(<1 x i64> %A) {
|
||||
; CHECK-LABEL: rbit_1x64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
||||
; CHECK-NEXT: fmov x8, d0
|
||||
; CHECK-NEXT: rbit x8, x8
|
||||
; CHECK-NEXT: fmov d0, x8
|
||||
; CHECK-NEXT: ret
|
||||
%tmp3 = call <1 x i64> @llvm.aarch64.neon.rbit.v1i64(<1 x i64> %A)
|
||||
ret <1 x i64> %tmp3
|
||||
}
|
||||
|
||||
declare <2 x i64> @llvm.aarch64.neon.rbit.v2i64(<2 x i64>) readnone
|
||||
|
||||
define <2 x i64> @rbit_2x64(<2 x i64> %A) {
|
||||
; CHECK-LABEL: rbit_2x64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: fmov x8, d0
|
||||
; CHECK-NEXT: rbit x8, x8
|
||||
; CHECK-NEXT: mov x9, v0.d[1]
|
||||
; CHECK-NEXT: fmov d0, x8
|
||||
; CHECK-NEXT: rbit x8, x9
|
||||
; CHECK-NEXT: mov v0.d[1], x8
|
||||
; CHECK-NEXT: ret
|
||||
%tmp3 = call <2 x i64> @llvm.aarch64.neon.rbit.v2i64(<2 x i64> %A)
|
||||
ret <2 x i64> %tmp3
|
||||
}
|
Loading…
Reference in New Issue