forked from OSchip/llvm-project
[ARM] Remove duplicate fp16 intrinsics
These vdup and vmov float16 intrinsics are being defined in both the general section and then again in fp16 under a !aarch64 flag. The vdup_lane intrinsics were being defined in both aarch64 and !aarch64 sections, so have been commoned. They are defined as macros, so do not give duplicate warnings, but removing the duplicates shouldn't alter the available intrinsics.
This commit is contained in:
parent
69d5a038b9
commit
3b09e532ee
|
@ -530,7 +530,7 @@ def VMOV_N : WOpInst<"vmov_n", ".1",
|
|||
}
|
||||
let InstName = "" in
|
||||
def VDUP_LANE: WOpInst<"vdup_lane", ".qI",
|
||||
"UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUl",
|
||||
"UcUsUicsiPcPshfQUcQUsQUiQcQsQiQPcQPsQhQflUlQlQUl",
|
||||
OP_DUP_LN>;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -980,7 +980,7 @@ def COPYQ_LANEQ : IOpInst<"vcopy_laneq", "..I.I",
|
|||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Set all lanes to same value
|
||||
def VDUP_LANE1: WOpInst<"vdup_lane", ".qI", "hdQhQdPlQPl", OP_DUP_LN>;
|
||||
def VDUP_LANE1: WOpInst<"vdup_lane", ".qI", "dQdPlQPl", OP_DUP_LN>;
|
||||
def VDUP_LANE2: WOpInst<"vdup_laneq", ".QI",
|
||||
"csilUcUsUiUlPcPshfdQcQsQiQlQPcQPsQUcQUsQUiQUlQhQfQdPlQPl",
|
||||
OP_DUP_LN> {
|
||||
|
@ -1644,7 +1644,8 @@ def SCALAR_VDUP_LANE : IInst<"vdup_lane", "1.I", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs
|
|||
def SCALAR_VDUP_LANEQ : IInst<"vdup_laneq", "1QI", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs"> {
|
||||
let isLaneQ = 1;
|
||||
}
|
||||
}
|
||||
|
||||
} // ArchGuard = "defined(__aarch64__)"
|
||||
|
||||
// ARMv8.2-A FP16 vector intrinsics for A32/A64.
|
||||
let ArchGuard = "defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)" in {
|
||||
|
@ -1763,15 +1764,6 @@ let ArchGuard = "defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)" in {
|
|||
def VUZPH : WInst<"vuzp", "2..", "hQh">;
|
||||
def VTRNH : WInst<"vtrn", "2..", "hQh">;
|
||||
|
||||
|
||||
let ArchGuard = "!defined(__aarch64__)" in {
|
||||
// Set all lanes to same value.
|
||||
// Already implemented prior to ARMv8.2-A.
|
||||
def VMOV_NH : WOpInst<"vmov_n", ".1", "hQh", OP_DUP>;
|
||||
def VDUP_NH : WOpInst<"vdup_n", ".1", "hQh", OP_DUP>;
|
||||
def VDUP_LANE1H : WOpInst<"vdup_lane", ".qI", "hQh", OP_DUP_LN>;
|
||||
}
|
||||
|
||||
// Vector Extract
|
||||
def VEXTH : WInst<"vext", "...I", "hQh">;
|
||||
|
||||
|
|
|
@ -1754,15 +1754,15 @@ float16x8_t test_vmulq_n_f16(float16x8_t a, float16_t b) {
|
|||
// CHECK-LABEL: define {{[^@]+}}@test_vmulh_lane_f16
|
||||
// CHECK-SAME: (half noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[__REINT_851:%.*]] = alloca <4 x half>, align 8
|
||||
// CHECK-NEXT: [[__REINT1_851:%.*]] = alloca i16, align 2
|
||||
// CHECK-NEXT: [[__REINT_847:%.*]] = alloca <4 x half>, align 8
|
||||
// CHECK-NEXT: [[__REINT1_847:%.*]] = alloca i16, align 2
|
||||
// CHECK-NEXT: [[CONV:%.*]] = fpext half [[A]] to float
|
||||
// CHECK-NEXT: store <4 x half> [[B]], <4 x half>* [[__REINT_851]], align 8
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half>* [[__REINT_851]] to <4 x i16>*
|
||||
// CHECK-NEXT: store <4 x half> [[B]], <4 x half>* [[__REINT_847]], align 8
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half>* [[__REINT_847]] to <4 x i16>*
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP0]], align 8
|
||||
// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
|
||||
// CHECK-NEXT: store i16 [[VGET_LANE]], i16* [[__REINT1_851]], align 2
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = bitcast i16* [[__REINT1_851]] to half*
|
||||
// CHECK-NEXT: store i16 [[VGET_LANE]], i16* [[__REINT1_847]], align 2
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = bitcast i16* [[__REINT1_847]] to half*
|
||||
// CHECK-NEXT: [[TMP3:%.*]] = load half, half* [[TMP2]], align 2
|
||||
// CHECK-NEXT: [[CONV2:%.*]] = fpext half [[TMP3]] to float
|
||||
// CHECK-NEXT: [[MUL:%.*]] = fmul float [[CONV]], [[CONV2]]
|
||||
|
@ -1776,15 +1776,15 @@ float16_t test_vmulh_lane_f16(float16_t a, float16x4_t b) {
|
|||
// CHECK-LABEL: define {{[^@]+}}@test_vmulh_laneq_f16
|
||||
// CHECK-SAME: (half noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR1]] {
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[__REINT_854:%.*]] = alloca <8 x half>, align 16
|
||||
// CHECK-NEXT: [[__REINT1_854:%.*]] = alloca i16, align 2
|
||||
// CHECK-NEXT: [[__REINT_850:%.*]] = alloca <8 x half>, align 16
|
||||
// CHECK-NEXT: [[__REINT1_850:%.*]] = alloca i16, align 2
|
||||
// CHECK-NEXT: [[CONV:%.*]] = fpext half [[A]] to float
|
||||
// CHECK-NEXT: store <8 x half> [[B]], <8 x half>* [[__REINT_854]], align 16
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half>* [[__REINT_854]] to <8 x i16>*
|
||||
// CHECK-NEXT: store <8 x half> [[B]], <8 x half>* [[__REINT_850]], align 16
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half>* [[__REINT_850]] to <8 x i16>*
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 16
|
||||
// CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
|
||||
// CHECK-NEXT: store i16 [[VGETQ_LANE]], i16* [[__REINT1_854]], align 2
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = bitcast i16* [[__REINT1_854]] to half*
|
||||
// CHECK-NEXT: store i16 [[VGETQ_LANE]], i16* [[__REINT1_850]], align 2
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = bitcast i16* [[__REINT1_850]] to half*
|
||||
// CHECK-NEXT: [[TMP3:%.*]] = load half, half* [[TMP2]], align 2
|
||||
// CHECK-NEXT: [[CONV2:%.*]] = fpext half [[TMP3]] to float
|
||||
// CHECK-NEXT: [[MUL:%.*]] = fmul float [[CONV]], [[CONV2]]
|
||||
|
@ -2281,6 +2281,30 @@ float16x8_t test_vdupq_lane_f16(float16x4_t a) {
|
|||
return vdupq_lane_f16(a, 3);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: define {{[^@]+}}@test_vdup_laneq_f16
|
||||
// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR1]] {
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
|
||||
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x half> [[TMP1]], <8 x half> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
|
||||
// CHECK-NEXT: ret <4 x half> [[LANE]]
|
||||
//
|
||||
float16x4_t test_vdup_laneq_f16(float16x8_t a) {
|
||||
return vdup_laneq_f16(a, 1);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: define {{[^@]+}}@test_vdupq_laneq_f16
|
||||
// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR1]] {
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
|
||||
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x half> [[TMP1]], <8 x half> [[TMP1]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
|
||||
// CHECK-NEXT: ret <8 x half> [[LANE]]
|
||||
//
|
||||
float16x8_t test_vdupq_laneq_f16(float16x8_t a) {
|
||||
return vdupq_laneq_f16(a, 7);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: define {{[^@]+}}@test_vext_f16
|
||||
// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
|
||||
// CHECK-NEXT: entry:
|
||||
|
|
Loading…
Reference in New Issue