RISC-V: hwprobe: Support probing of misaligned access performance
This allows userspace to select various routines to use based on the performance of misaligned access on the target hardware. Rather than adding DT bindings, this change taps into the alternatives mechanism used to probe CPU errata. Add a new function pointer alongside the vendor-specific errata_patch_func() that probes for desirable errata (otherwise known as "features"). Unlike the errata_patch_func(), this function is called on each CPU as it comes up, so it can save feature information per-CPU. The T-head C906 has fast unaligned access, both as defined by GCC [1], and in performing a basic benchmark, which determined that byte copies are >50% slower than a misaligned word copy of the same data size (source for this test at [2]): bytecopy size f000 count 50000 offset 0 took 31664899 us wordcopy size f000 count 50000 offset 0 took 5180919 us wordcopy size f000 count 50000 offset 1 took 13416949 us [1] https://github.com/gcc-mirror/gcc/blob/master/gcc/config/riscv/riscv.cc#L353 [2] https://pastebin.com/EPXvDHSW Co-developed-by: Palmer Dabbelt <palmer@rivosinc.com> Signed-off-by: Evan Green <evan@rivosinc.com> Reviewed-by: Heiko Stuebner <heiko.stuebner@vrull.eu> Tested-by: Heiko Stuebner <heiko.stuebner@vrull.eu> Reviewed-by: Conor Dooley <conor.dooley@microchip.com> Reviewed-by: Paul Walmsley <paul.walmsley@sifive.com> Link: https://lore.kernel.org/r/20230407231103.2622178-5-evan@rivosinc.com Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
This commit is contained in:
parent
00e76e2c6a
commit
62a31d6e38
|
@ -63,3 +63,24 @@ The following keys are defined:
|
|||
|
||||
* :c:macro:`RISCV_HWPROBE_IMA_C`: The C extension is supported, as defined
|
||||
by version 2.2 of the RISC-V ISA manual.
|
||||
|
||||
* :c:macro:`RISCV_HWPROBE_KEY_CPUPERF_0`: A bitmask that contains performance
|
||||
information about the selected set of processors.
|
||||
|
||||
* :c:macro:`RISCV_HWPROBE_MISALIGNED_UNKNOWN`: The performance of misaligned
|
||||
accesses is unknown.
|
||||
|
||||
* :c:macro:`RISCV_HWPROBE_MISALIGNED_EMULATED`: Misaligned accesses are
|
||||
emulated via software, either in or below the kernel. These accesses are
|
||||
always extremely slow.
|
||||
|
||||
* :c:macro:`RISCV_HWPROBE_MISALIGNED_SLOW`: Misaligned accesses are supported
|
||||
in hardware, but are slower than the cooresponding aligned accesses
|
||||
sequences.
|
||||
|
||||
* :c:macro:`RISCV_HWPROBE_MISALIGNED_FAST`: Misaligned accesses are supported
|
||||
in hardware and are faster than the cooresponding aligned accesses
|
||||
sequences.
|
||||
|
||||
* :c:macro:`RISCV_HWPROBE_MISALIGNED_UNSUPPORTED`: Misaligned accesses are
|
||||
not supported at all and will generate a misaligned address fault.
|
||||
|
|
|
@ -11,7 +11,9 @@
|
|||
#include <linux/uaccess.h>
|
||||
#include <asm/alternative.h>
|
||||
#include <asm/cacheflush.h>
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/errata_list.h>
|
||||
#include <asm/hwprobe.h>
|
||||
#include <asm/patch.h>
|
||||
#include <asm/vendorid_list.h>
|
||||
|
||||
|
@ -115,3 +117,11 @@ void __init_or_module thead_errata_patch_func(struct alt_entry *begin, struct al
|
|||
if (stage == RISCV_ALTERNATIVES_EARLY_BOOT)
|
||||
local_flush_icache_all();
|
||||
}
|
||||
|
||||
void __init_or_module thead_feature_probe_func(unsigned int cpu,
|
||||
unsigned long archid,
|
||||
unsigned long impid)
|
||||
{
|
||||
if ((archid == 0) && (impid == 0))
|
||||
per_cpu(misaligned_access_speed, cpu) = RISCV_HWPROBE_MISALIGNED_FAST;
|
||||
}
|
||||
|
|
|
@ -28,6 +28,7 @@
|
|||
#define ALT_OLD_PTR(a) __ALT_PTR(a, old_offset)
|
||||
#define ALT_ALT_PTR(a) __ALT_PTR(a, alt_offset)
|
||||
|
||||
void __init probe_vendor_features(unsigned int cpu);
|
||||
void __init apply_boot_alternatives(void);
|
||||
void __init apply_early_boot_alternatives(void);
|
||||
void apply_module_alternatives(void *start, size_t length);
|
||||
|
@ -55,11 +56,15 @@ void thead_errata_patch_func(struct alt_entry *begin, struct alt_entry *end,
|
|||
unsigned long archid, unsigned long impid,
|
||||
unsigned int stage);
|
||||
|
||||
void thead_feature_probe_func(unsigned int cpu, unsigned long archid,
|
||||
unsigned long impid);
|
||||
|
||||
void riscv_cpufeature_patch_func(struct alt_entry *begin, struct alt_entry *end,
|
||||
unsigned int stage);
|
||||
|
||||
#else /* CONFIG_RISCV_ALTERNATIVE */
|
||||
|
||||
static inline void probe_vendor_features(unsigned int cpu) { }
|
||||
static inline void apply_boot_alternatives(void) { }
|
||||
static inline void apply_early_boot_alternatives(void) { }
|
||||
static inline void apply_module_alternatives(void *start, size_t length) { }
|
||||
|
|
|
@ -18,4 +18,6 @@ struct riscv_cpuinfo {
|
|||
|
||||
DECLARE_PER_CPU(struct riscv_cpuinfo, riscv_cpuinfo);
|
||||
|
||||
DECLARE_PER_CPU(long, misaligned_access_speed);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -8,6 +8,6 @@
|
|||
|
||||
#include <uapi/asm/hwprobe.h>
|
||||
|
||||
#define RISCV_HWPROBE_MAX_KEY 4
|
||||
#define RISCV_HWPROBE_MAX_KEY 5
|
||||
|
||||
#endif
|
||||
|
|
|
@ -25,6 +25,13 @@ struct riscv_hwprobe {
|
|||
#define RISCV_HWPROBE_KEY_IMA_EXT_0 4
|
||||
#define RISCV_HWPROBE_IMA_FD (1 << 0)
|
||||
#define RISCV_HWPROBE_IMA_C (1 << 1)
|
||||
#define RISCV_HWPROBE_KEY_CPUPERF_0 5
|
||||
#define RISCV_HWPROBE_MISALIGNED_UNKNOWN (0 << 0)
|
||||
#define RISCV_HWPROBE_MISALIGNED_EMULATED (1 << 0)
|
||||
#define RISCV_HWPROBE_MISALIGNED_SLOW (2 << 0)
|
||||
#define RISCV_HWPROBE_MISALIGNED_FAST (3 << 0)
|
||||
#define RISCV_HWPROBE_MISALIGNED_UNSUPPORTED (4 << 0)
|
||||
#define RISCV_HWPROBE_MISALIGNED_MASK (7 << 0)
|
||||
/* Increase RISCV_HWPROBE_MAX_KEY when adding items. */
|
||||
|
||||
#endif
|
||||
|
|
|
@ -27,6 +27,8 @@ struct cpu_manufacturer_info_t {
|
|||
void (*patch_func)(struct alt_entry *begin, struct alt_entry *end,
|
||||
unsigned long archid, unsigned long impid,
|
||||
unsigned int stage);
|
||||
void (*feature_probe_func)(unsigned int cpu, unsigned long archid,
|
||||
unsigned long impid);
|
||||
};
|
||||
|
||||
static void __init_or_module riscv_fill_cpu_mfr_info(struct cpu_manufacturer_info_t *cpu_mfr_info)
|
||||
|
@ -41,6 +43,7 @@ static void __init_or_module riscv_fill_cpu_mfr_info(struct cpu_manufacturer_inf
|
|||
cpu_mfr_info->imp_id = sbi_get_mimpid();
|
||||
#endif
|
||||
|
||||
cpu_mfr_info->feature_probe_func = NULL;
|
||||
switch (cpu_mfr_info->vendor_id) {
|
||||
#ifdef CONFIG_ERRATA_SIFIVE
|
||||
case SIFIVE_VENDOR_ID:
|
||||
|
@ -50,6 +53,7 @@ static void __init_or_module riscv_fill_cpu_mfr_info(struct cpu_manufacturer_inf
|
|||
#ifdef CONFIG_ERRATA_THEAD
|
||||
case THEAD_VENDOR_ID:
|
||||
cpu_mfr_info->patch_func = thead_errata_patch_func;
|
||||
cpu_mfr_info->feature_probe_func = thead_feature_probe_func;
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
|
@ -139,6 +143,20 @@ void riscv_alternative_fix_offsets(void *alt_ptr, unsigned int len,
|
|||
}
|
||||
}
|
||||
|
||||
/* Called on each CPU as it starts */
|
||||
void __init_or_module probe_vendor_features(unsigned int cpu)
|
||||
{
|
||||
struct cpu_manufacturer_info_t cpu_mfr_info;
|
||||
|
||||
riscv_fill_cpu_mfr_info(&cpu_mfr_info);
|
||||
if (!cpu_mfr_info.feature_probe_func)
|
||||
return;
|
||||
|
||||
cpu_mfr_info.feature_probe_func(cpu,
|
||||
cpu_mfr_info.arch_id,
|
||||
cpu_mfr_info.imp_id);
|
||||
}
|
||||
|
||||
/*
|
||||
* This is called very early in the boot process (directly after we run
|
||||
* a feature detect on the boot CPU). No need to worry about other CPUs
|
||||
|
@ -193,6 +211,7 @@ void __init apply_boot_alternatives(void)
|
|||
/* If called on non-boot cpu things could go wrong */
|
||||
WARN_ON(smp_processor_id() != 0);
|
||||
|
||||
probe_vendor_features(0);
|
||||
_apply_alternatives((struct alt_entry *)__alt_start,
|
||||
(struct alt_entry *)__alt_end,
|
||||
RISCV_ALTERNATIVES_BOOT);
|
||||
|
|
|
@ -30,6 +30,9 @@ unsigned long elf_hwcap __read_mostly;
|
|||
/* Host ISA bitmap */
|
||||
static DECLARE_BITMAP(riscv_isa, RISCV_ISA_EXT_MAX) __read_mostly;
|
||||
|
||||
/* Performance information */
|
||||
DEFINE_PER_CPU(long, misaligned_access_speed);
|
||||
|
||||
/**
|
||||
* riscv_isa_extension_base() - Get base extension word
|
||||
*
|
||||
|
|
|
@ -168,6 +168,7 @@ asmlinkage __visible void smp_callin(void)
|
|||
notify_cpu_starting(curr_cpuid);
|
||||
numa_add_cpu(curr_cpuid);
|
||||
set_cpu_online(curr_cpuid, 1);
|
||||
probe_vendor_features(curr_cpuid);
|
||||
|
||||
/*
|
||||
* Remote TLB flushes are ignored while the CPU is offline, so emit
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
|
||||
#include <linux/syscalls.h>
|
||||
#include <asm/cacheflush.h>
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/hwprobe.h>
|
||||
#include <asm/sbi.h>
|
||||
#include <asm/switch_to.h>
|
||||
|
@ -117,6 +118,29 @@ static void hwprobe_arch_id(struct riscv_hwprobe *pair,
|
|||
pair->value = id;
|
||||
}
|
||||
|
||||
static u64 hwprobe_misaligned(const struct cpumask *cpus)
|
||||
{
|
||||
int cpu;
|
||||
u64 perf = -1ULL;
|
||||
|
||||
for_each_cpu(cpu, cpus) {
|
||||
int this_perf = per_cpu(misaligned_access_speed, cpu);
|
||||
|
||||
if (perf == -1ULL)
|
||||
perf = this_perf;
|
||||
|
||||
if (perf != this_perf) {
|
||||
perf = RISCV_HWPROBE_MISALIGNED_UNKNOWN;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (perf == -1ULL)
|
||||
return RISCV_HWPROBE_MISALIGNED_UNKNOWN;
|
||||
|
||||
return perf;
|
||||
}
|
||||
|
||||
static void hwprobe_one_pair(struct riscv_hwprobe *pair,
|
||||
const struct cpumask *cpus)
|
||||
{
|
||||
|
@ -146,6 +170,10 @@ static void hwprobe_one_pair(struct riscv_hwprobe *pair,
|
|||
|
||||
break;
|
||||
|
||||
case RISCV_HWPROBE_KEY_CPUPERF_0:
|
||||
pair->value = hwprobe_misaligned(cpus);
|
||||
break;
|
||||
|
||||
/*
|
||||
* For forward compatibility, unknown keys don't fail the whole
|
||||
* call, but get their element key set to -1 and value set to 0
|
||||
|
|
Loading…
Reference in New Issue