[AArch64] ACLE feature macro for Armv8.8-A MOPS

This introduces the new __ARM_FEATURE_MOPS ACLE feature test macro,
which signals the availability of the new Armv8.8-A/Armv9.3-A
instructions for standardising memcpy, memset and memmove operations.

This patch supersedes the one from https://reviews.llvm.org/D116160.

Differential Revision: https://reviews.llvm.org/D118199
This commit is contained in:
tyb0807 2022-01-25 22:51:49 +00:00
parent 3dce6b329c
commit c70b935089
3 changed files with 54 additions and 111 deletions

View File

@ -226,6 +226,8 @@ void AArch64TargetInfo::getTargetDefinesARMV87A(const LangOptions &Opts,
void AArch64TargetInfo::getTargetDefinesARMV88A(const LangOptions &Opts,
MacroBuilder &Builder) const {
// FIXME: this does not handle the case where MOPS is disabled using +nomops
Builder.defineMacro("__ARM_FEATURE_MOPS", "1");
// Also include the Armv8.7 defines
getTargetDefinesARMV87A(Opts, Builder);
}
@ -435,6 +437,9 @@ void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts,
if (HasRandGen)
Builder.defineMacro("__ARM_FEATURE_RNG", "1");
if (HasMOPS)
Builder.defineMacro("__ARM_FEATURE_MOPS", "1");
switch (ArchKind) {
default:
break;
@ -662,6 +667,8 @@ bool AArch64TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
HasFlagM = true;
if (Feature == "+hbc")
HasHBC = true;
if (Feature == "+mops")
HasMOPS = true;
}
setDataLayout();

View File

@ -1,153 +1,77 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// RUN: %clang_cc1 -triple aarch64-arm-unknown-eabi -target-feature +mops -target-feature +mte -w -S -emit-llvm -o - %s | FileCheck --check-prefix=CHECK-MOPS %s
// RUN: %clang_cc1 -triple aarch64-arm-unknown-eabi -target-feature +mops -w -S -emit-llvm -o - %s | FileCheck --check-prefix=CHECK-NOMOPS %s
// RUN: %clang_cc1 -triple aarch64-arm-unknown-eabi -target-feature +mte -w -S -emit-llvm -o - %s | FileCheck --check-prefix=CHECK-NOMOPS %s
// RUN: %clang_cc1 -triple aarch64-arm-unknown-eabi -w -S -emit-llvm -o - %s | FileCheck --check-prefix=CHECK-NOMOPS %s
// RUN: %clang_cc1 -triple aarch64-arm-unknown-eabi -target-feature +mops -target-feature +mte -S -emit-llvm -o - %s | FileCheck %s
#define __ARM_FEATURE_MOPS 1
#include <arm_acle.h>
#include <stddef.h>
// CHECK-LABEL: @bzero_0(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8
// CHECK-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 0, i64 0)
// CHECK-NEXT: ret i8* [[TMP1]]
//
// CHECK-LABEL: @bzero_0(
// CHECK-MOPS: @llvm.aarch64.mops.memset.tag
// CHECK-NOMOPS-NOT: @llvm.aarch64.mops.memset.tag
void *bzero_0(void *dst) {
return __arm_mops_memset_tag(dst, 0, 0);
}
// CHECK-LABEL: @bzero_1(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8
// CHECK-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 0, i64 1)
// CHECK-NEXT: ret i8* [[TMP1]]
//
// CHECK-LABEL: @bzero_1(
// CHECK-MOPS: @llvm.aarch64.mops.memset.tag
// CHECK-NOMOPS-NOT: @llvm.aarch64.mops.memset.tag
void *bzero_1(void *dst) {
return __arm_mops_memset_tag(dst, 0, 1);
}
// CHECK-LABEL: @bzero_10(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8
// CHECK-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 0, i64 10)
// CHECK-NEXT: ret i8* [[TMP1]]
//
// CHECK-LABEL: @bzero_10(
// CHECK-MOPS: @llvm.aarch64.mops.memset.tag
// CHECK-NOMOPS-NOT: @llvm.aarch64.mops.memset.tag
void *bzero_10(void *dst) {
return __arm_mops_memset_tag(dst, 0, 10);
}
// CHECK-LABEL: @bzero_10000(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8
// CHECK-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 0, i64 10000)
// CHECK-NEXT: ret i8* [[TMP1]]
//
// CHECK-LABEL: @bzero_10000(
// CHECK-MOPS: @llvm.aarch64.mops.memset.tag
// CHECK-NOMOPS-NOT: @llvm.aarch64.mops.memset.tag
void *bzero_10000(void *dst) {
return __arm_mops_memset_tag(dst, 0, 10000);
}
// CHECK-LABEL: @bzero_n(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8
// CHECK-NEXT: [[SIZE_ADDR:%.*]] = alloca i64, align 8
// CHECK-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8
// CHECK-NEXT: store i64 [[SIZE:%.*]], i64* [[SIZE_ADDR]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* [[SIZE_ADDR]], align 8
// CHECK-NEXT: [[TMP2:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 0, i64 [[TMP1]])
// CHECK-NEXT: ret i8* [[TMP2]]
//
// CHECK-LABEL: @bzero_n(
// CHECK-MOPS: @llvm.aarch64.mops.memset.tag
// CHECK-NOMOPS-NOT: @llvm.aarch64.mops.memset.tag
void *bzero_n(void *dst, size_t size) {
return __arm_mops_memset_tag(dst, 0, size);
}
// CHECK-LABEL: @memset_0(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8
// CHECK-NEXT: [[VALUE_ADDR:%.*]] = alloca i32, align 4
// CHECK-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8
// CHECK-NEXT: store i32 [[VALUE:%.*]], i32* [[VALUE_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[VALUE_ADDR]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
// CHECK-NEXT: [[TMP3:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 [[TMP2]], i64 0)
// CHECK-NEXT: ret i8* [[TMP3]]
//
// CHECK-LABEL: @memset_0(
// CHECK-MOPS: @llvm.aarch64.mops.memset.tag
// CHECK-NOMOPS-NOT: @llvm.aarch64.mops.memset.tag
void *memset_0(void *dst, int value) {
return __arm_mops_memset_tag(dst, value, 0);
}
// CHECK-LABEL: @memset_1(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8
// CHECK-NEXT: [[VALUE_ADDR:%.*]] = alloca i32, align 4
// CHECK-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8
// CHECK-NEXT: store i32 [[VALUE:%.*]], i32* [[VALUE_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[VALUE_ADDR]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
// CHECK-NEXT: [[TMP3:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 [[TMP2]], i64 1)
// CHECK-NEXT: ret i8* [[TMP3]]
//
// CHECK-LABEL: @memset_1(
// CHECK-MOPS: @llvm.aarch64.mops.memset.tag
// CHECK-NOMOPS-NOT: @llvm.aarch64.mops.memset.tag
void *memset_1(void *dst, int value) {
return __arm_mops_memset_tag(dst, value, 1);
}
// CHECK-LABEL: @memset_10(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8
// CHECK-NEXT: [[VALUE_ADDR:%.*]] = alloca i32, align 4
// CHECK-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8
// CHECK-NEXT: store i32 [[VALUE:%.*]], i32* [[VALUE_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[VALUE_ADDR]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
// CHECK-NEXT: [[TMP3:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 [[TMP2]], i64 10)
// CHECK-NEXT: ret i8* [[TMP3]]
//
// CHECK-LABEL: @memset_10(
// CHECK-MOPS: @llvm.aarch64.mops.memset.tag
// CHECK-NOMOPS-NOT: @llvm.aarch64.mops.memset.tag
void *memset_10(void *dst, int value) {
return __arm_mops_memset_tag(dst, value, 10);
}
// CHECK-LABEL: @memset_10000(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8
// CHECK-NEXT: [[VALUE_ADDR:%.*]] = alloca i32, align 4
// CHECK-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8
// CHECK-NEXT: store i32 [[VALUE:%.*]], i32* [[VALUE_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[VALUE_ADDR]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
// CHECK-NEXT: [[TMP3:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 [[TMP2]], i64 10000)
// CHECK-NEXT: ret i8* [[TMP3]]
//
// CHECK-LABEL: @memset_10000(
// CHECK-MOPS: @llvm.aarch64.mops.memset.tag
// CHECK-NOMOPS-NOT: @llvm.aarch64.mops.memset.tag
void *memset_10000(void *dst, int value) {
return __arm_mops_memset_tag(dst, value, 10000);
}
// CHECK-LABEL: @memset_n(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8
// CHECK-NEXT: [[VALUE_ADDR:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[SIZE_ADDR:%.*]] = alloca i64, align 8
// CHECK-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8
// CHECK-NEXT: store i32 [[VALUE:%.*]], i32* [[VALUE_ADDR]], align 4
// CHECK-NEXT: store i64 [[SIZE:%.*]], i64* [[SIZE_ADDR]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[VALUE_ADDR]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* [[SIZE_ADDR]], align 8
// CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP1]] to i8
// CHECK-NEXT: [[TMP4:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 [[TMP3]], i64 [[TMP2]])
// CHECK-NEXT: ret i8* [[TMP4]]
//
// CHECK-LABEL: @memset_n(
// CHECK-MOPS: @llvm.aarch64.mops.memset.tag
// CHECK-NOMOPS-NOT: @llvm.aarch64.mops.memset.tag
void *memset_n(void *dst, int value, size_t size) {
return __arm_mops_memset_tag(dst, value, size);
}

View File

@ -510,9 +510,21 @@
// CHECK-NO-SVE-VECTOR-BITS-NOT: __ARM_FEATURE_SVE_BITS
// CHECK-NO-SVE-VECTOR-BITS-NOT: __ARM_FEATURE_SVE_VECTOR_OPERATORS
// ================== Check Largse System Extensions (LSE)
// ================== Check Large System Extensions (LSE)
// RUN: %clang -target aarch64-none-linux-gnu -march=armv8-a+lse -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-LSE %s
// RUN: %clang -target arm64-none-linux-gnu -march=armv8-a+lse -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-LSE %s
// RUN: %clang -target aarch64-none-linux-gnu -march=armv8.1-a -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-LSE %s
// RUN: %clang -target arm64-none-linux-gnu -march=armv8.1-a -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-LSE %s
// CHECK-LSE: __ARM_FEATURE_ATOMICS 1
// ================== Check Armv8.8-A/Armv9.3-A memcpy and memset acceleration instructions (MOPS)
// RUN: %clang -target aarch64-arm-none-eabi -march=armv8.7-a -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-NOMOPS %s
// RUN: %clang -target aarch64-arm-none-eabi -march=armv8.7-a+mops -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-MOPS %s
// RUN: %clang -target aarch64-arm-none-eabi -march=armv8.8-a -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-MOPS %s
// RUN: %clang -target aarch64-arm-none-eabi -march=armv8.8-a+mops -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-MOPS %s
// RUN: %clang -target aarch64-arm-none-eabi -march=armv9.2-a -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-NOMOPS %s
// RUN: %clang -target aarch64-arm-none-eabi -march=armv9.2-a+mops -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-MOPS %s
// RUN: %clang -target aarch64-arm-none-eabi -march=armv9.3-a -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-MOPS %s
// RUN: %clang -target aarch64-arm-none-eabi -march=armv9.3-a+mops -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-MOPS %s
// CHECK-MOPS: __ARM_FEATURE_MOPS 1
// CHECK-NOMOPS-NOT: __ARM_FEATURE_MOPS 1